Skip to content

Commit

Permalink
add 'multi' flag to prevent aggregation. New default is to aggregate
Browse files Browse the repository at this point in the history
signal into 1 matrix.
  • Loading branch information
snystrom committed Oct 17, 2021
1 parent 3969704 commit 8fc3de8
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 13 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[package]
name = "vplot"
version = "0.2.1"
version = "0.2.2"
authors = ["Spencer Nystrom <nystromdev@gmail.com>"]
edition = "2018"

[dependencies]
csv = "*"
bio = "*"
rust-htslib = {version = "*", default-features = false}
rust-htslib = {version = "0.32", default-features = false}
structopt = "*"
ndarray = "0.13.1"
ndarray-csv = "*"
50 changes: 39 additions & 11 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ struct Cli {
max_fragment_size: i64,
/// How reads are counted in the matrix. Using either the midpoint of the fragment, fragment ends, or the whole fragment.
#[structopt(default_value = "midpoint", short = "f", long = "fragment-type", possible_values = &["midpoint", "ends", "fragment"])]
fragment_type: String
fragment_type: String,
/// Instead of aggregating reads into 1 matrix, write 1 matrix for each region.
#[structopt(short = "m", long)]
multi: bool
//TODO: add flag to use bed column as matrix output name

}

Expand Down Expand Up @@ -95,20 +99,34 @@ struct VMatrix {
n_regions: i64,
ncol: i64,
nrow: i64,
max_fragment_size: i64
max_fragment_size: i64,
aggregate: bool
}

fn get_nrow(n_regions: &i64, max_fragment_size: &i64, multi: &bool) -> i64 {
if *multi {
let nrow: i64 = n_regions * max_fragment_size;
return nrow
} else
{
let nrow: i64 = 1 * max_fragment_size;
return nrow
};

}

impl VMatrix {
fn allocate(regions: &VRegions, max_fragment_size: &i64) -> VMatrix {
fn allocate(regions: &VRegions, max_fragment_size: &i64, multi: &bool) -> VMatrix {

let nrow: i64 = regions.n_regions * max_fragment_size;
let nrow = get_nrow(&regions.n_regions, max_fragment_size, multi);

VMatrix {
matrix: VMatrix::_initialize_matrix(nrow, regions.width),
n_regions: regions.n_regions,
ncol: regions.width,
nrow: nrow,
n_regions: regions.n_regions,
max_fragment_size: *max_fragment_size
max_fragment_size: *max_fragment_size,
aggregate: !multi
}

}
Expand Down Expand Up @@ -191,9 +209,9 @@ impl VRegions<'_> {
let (n_regions, width) = VRegions::_get_dims_from_bed(path);

VRegions {
path: path,
n_regions: n_regions,
width: width
path,
n_regions,
width
}
}

Expand All @@ -208,22 +226,28 @@ impl VRegions<'_> {
let mut set_width = 0 as i64;
let mut init = true;
for region in bed_reader.unwrap().records() {
//eprintln!("{:?}", region);
let record = region.unwrap();
n_regions += 1;
// Grab width of first region
// all other regions should be this width
if init {
//eprintln!("{:?}", record);
// TODO: throw error if set.width is negative. should end always be > start?
// maybe also consider parsing the strand column & conditionally enforcing this. ie if "." take abs, if + do end - start, if - do start - end
set_width = (record.end() - record.start()).try_into().unwrap();
init = false;
}

//TODO: throw error if set.width is negative. should end always be > start?
let width = (record.end() - record.start()).try_into().unwrap();

if set_width != width {
panic!("All bed regions must be equal width\nWidth: {} of bed entry on line {} does not match width of first entry: {}", width, n_regions, set_width);
}

}
//TODO: if aggregate == True, n_regions = 1
(n_regions, set_width)
}

Expand All @@ -244,7 +268,7 @@ fn main() {

let mut ventry = VEntry::allocate();

let mut vmatrix = VMatrix::allocate(&regions, &args.max_fragment_size);
let mut vmatrix = VMatrix::allocate(&regions, &args.max_fragment_size, &args.multi);

let mut bed_region_n = 0 as i64;
for bed_region in bed_reader.unwrap().records() {
Expand Down Expand Up @@ -297,9 +321,13 @@ fn main() {
}

// Advance to next bed region
bed_region_n += 1;
// TODO: if -multi = True, increment this, else skip (to aggregate into 1 matrix)
if !vmatrix.aggregate {
bed_region_n += 1;
}
}

// TODO: if multi = True, write 1 matrix per region to file
let mut writer = csv::Writer::from_writer(std::io::stdout());
writer.serialize_array2(&vmatrix.matrix)
.expect("cannot write matrix to stdout");
Expand Down

0 comments on commit 8fc3de8

Please sign in to comment.