Skip to content

Commit

Permalink
update dependencies and clean up code in genotype and parse_bam modules
Browse files Browse the repository at this point in the history
  • Loading branch information
wdecoster committed Jan 22, 2025
1 parent 3f3ebf0 commit f44faa4
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 28 deletions.
20 changes: 10 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ edition = "2021"

[dependencies]
clap = { version = "3.2.6", features = ["derive"] }
env_logger = "0.11.3"
flate2 = "1.0.24"
log = "0.4.17"
env_logger = "0.11.6"
flate2 = "1.0.35"
log = "0.4.25"
rayon = "1.5.3"
rust-htslib = { version = "0.47.0", features = ["s3", "curl"] }
rust-htslib = { version = "0.49.0", features = ["s3", "curl"] }
bio = { git = "https://github.com/lorewar2/rust-bio.git", branch = "reverse_edge_fix" }
human-sort = "0.2.2"
minimap2 = { version = "0.1.20", features = ["htslib"] }
kodama = "0.3.0"
regex = "1.8.1"
url = "2.3.1"
regex = "1.11.1"
url = "2.5.4"
distance = "0.4.0"
levenshtein = "1.0.5"
rand = "0.8.5"
libz-sys = "1.1.12"
libz-sys = "1.1.21"
libc = "0.2.147"
petgraph = "0.6.4"
petgraph = "0.7.1"
hts-sys = "2.1.1"
reqwest = { version = "0.12.5", features = ["blocking", "json"] }
indicatif = { version = "0.17.1", features = ["rayon"] }
chrono = "0.4.38"
indicatif = { version = "0.17.9", features = ["rayon"] }
chrono = "0.4.39"

[dev-dependencies]
ctor = "*"
15 changes: 4 additions & 11 deletions src/genotype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,6 @@ fn genotype_repeat(
};

let repeat_compressed_reference = repeat.make_repeat_compressed_sequence(&args.fasta, flanking);
// if args.debug {
// // write the repeat compressed reference to a file
// use std::fs;
// let header = format!(">{chrom}:{start}-{end}\n", chrom = repeat.chrom, start = repeat.start, end = repeat.end);
// let fas = String::from_utf8(repeat_compressed_reference.clone()).expect("Unable to convert repeat compressed reference to string");
// fs::write("repeat_compressed.fa", header + &fas).expect("Unable to write repeat compressed reference to file");
// }

// alignments can be extracted in an unphased manner, if the chromosome is --haploid or the --unphased is set
// this means that --haploid overrides the phases which could be present in the bam file
Expand Down Expand Up @@ -113,7 +106,7 @@ fn genotype_repeat(
.collect::<Vec<&str>>()
.contains(&repeat.chrom.as_str())
{
// if the chromosome is haploid, all reads are put in phase 0
// if the chromosome is haploid, all reads were put in phase 0
let seq = reads.seqs.get(&0).unwrap();
debug!("{repeat}: Haploid: Aligning {} reads", seq.len());
let insertions = find_insertions(seq, &aligner, args.minlen, flanking, repeat);
Expand Down Expand Up @@ -254,7 +247,7 @@ fn genotype_repeat(
// may adapt the function below to allow for multiple alignment methods later
fn find_insertions(
seq: &Vec<Vec<u8>>,
aligner: &Aligner,
aligner: &Aligner<Built>,
minlen: usize,
flanking: u32,
repeat: &crate::repeats::RepeatInterval,
Expand All @@ -263,7 +256,7 @@ fn find_insertions(

// align the reads to the new repeat-compressed reference
for s in seq {
let mapping = aligner.map(s.as_slice(), true, false, None, None).unwrap_or_else(|err| panic!("Unable to align read with seq {s:?} to repeat-compressed reference for {repeat}\n{err}", s=s.to_ascii_uppercase()));
let mapping = aligner.map(s.as_slice(), true, false, None, None, None).unwrap_or_else(|err| panic!("Unable to align read with seq {s:?} to repeat-compressed reference for {repeat}\n{err}", s=s.to_ascii_uppercase()));
for read in mapping {
if let Some(s) = parse_cs(read, minlen, flanking, repeat) {
// slice out inserted sequences from the CS tag
Expand Down Expand Up @@ -382,7 +375,7 @@ mod tests {
.with_cigar()
.with_seq(&repeat_compressed_reference)
.expect("Unable to build index");
let mapping = aligner.map(read.as_slice(), true, false, None, None).unwrap_or_else(|_| panic!("Unable to align read with seq {read:?} to repeat-compressed reference for {repeat}", read=read.to_ascii_uppercase()));
let mapping = aligner.map(read.as_slice(), true, false, None, None, None).unwrap_or_else(|_| panic!("Unable to align read with seq {read:?} to repeat-compressed reference for {repeat}", read=read.to_ascii_uppercase()));

let _insertion = parse_cs(
mapping
Expand Down
2 changes: 1 addition & 1 deletion src/parse_bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub fn get_overlapping_reads(
for (phase, seqs_phase) in seqs.iter() {
let n_reads = seqs_phase.len();
let n_reads_to_select = if n_reads > max_reads_per_phase[&phase] {
max_reads_per_phase[&phase]
max_reads_per_phase[phase]
} else {
n_reads
};
Expand Down
8 changes: 2 additions & 6 deletions src/repeats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,13 @@ impl RepeatInterval {
(self.end + flanking - 2) as usize,
)
.expect("Failed to extract fas_right sequence from fasta for {chrom}:{start}-{end}");

let newref = [fas_left, fas_right].concat();
unsafe { libc::free(fas_left.as_ptr() as *mut std::ffi::c_void) }; // Free up memory (https://github.com/rust-bio/rust-htslib/issues/401#issuecomment-1704290171)
unsafe { libc::free(fas_right.as_ptr() as *mut std::ffi::c_void) }; // Free up memory
newref
[fas_left, fas_right].concat()
}

pub fn reference_repeat_sequence(&self, fasta: &String) -> Option<String> {
let fas = faidx::Reader::from_path(fasta).expect("Failed to read fasta");
let repeat_ref_sequence = std::str::from_utf8(
fas.fetch_seq(&self.chrom, self.start as usize - 1, self.end as usize)
&fas.fetch_seq(&self.chrom, self.start as usize - 1, self.end as usize)
.expect("Failed to extract repeat sequence from fasta for {chrom}:{start}-{end}"),
)
.expect("Failed to convert repeat sequence to string for {chrom}:{start}-{end}")
Expand Down

0 comments on commit f44faa4

Please sign in to comment.