Skip to content

Commit

Permalink
added preservation of unclustered peaks
Browse files Browse the repository at this point in the history
  • Loading branch information
jspaezp committed Apr 19, 2024
1 parent 5db470d commit ab2a309
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 16 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
```
cargo build --release --features par_dataprep
./target/release/peakachu --help
RUST_LOG=info ./target/release/peakachu ...
```

## Ideas
Expand Down
4 changes: 2 additions & 2 deletions src/aggregation/chromatograms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl BTreeChromatogram {
pub fn add(&mut self, rt: f32, intensity: u64) {
let add_rt = rt + f32::EPSILON;
if self.rt_bin_offset.is_none() {
self.rt_bin_offset = Some(rt);
self.rt_bin_offset = Some(rt - (self.rt_binsize / 2.));
}
let bin = self.rt_to_bin(add_rt);
let entry = self.btree.entry(bin).or_insert(0);
Expand All @@ -72,7 +72,7 @@ impl BTreeChromatogram {

fn rt_range(&self) -> Option<(f32, f32)> {
let (min, max) = self.int_range()?;
let bo = self.rt_bin_offset.expect("Bin offset not set");
let _bo = self.rt_bin_offset.expect("Bin offset not set");
Some((self.bin_to_rt(min), self.bin_to_rt(max)))
}

Expand Down
20 changes: 20 additions & 0 deletions src/aggregation/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ pub fn aggregate_clusters<
elements: Vec<T>,
def_aggregator: F,
log_level: utils::LogLevel,
keep_unclustered: bool,
) -> Vec<R> {
let cluster_vecs: Vec<G> = if cfg!(feature = "par_dataprep") {
let mut timer =
Expand Down Expand Up @@ -489,11 +490,18 @@ pub fn aggregate_clusters<
.enumerate()
.filter(|(_, x)| match x {
ClusterLabel::Unassigned => true,
ClusterLabel::Noise => keep_unclustered,
_ => false,
})
.map(|(i, _elem)| i)
.collect();

// if unclustered_elems.len() > 0 {
// log::debug!("Total Orig elems: {}", cluster_labels.len());
// log::debug!("Unclustered elems: {}", unclustered_elems.len());
// log::debug!("Clustered elems: {}", cluster_vecs.len());
// }

let unclustered_elems = unclustered_elems
.iter()
.map(|i| {
Expand All @@ -509,6 +517,7 @@ pub fn aggregate_clusters<
cluster_vecs
} else {
let mut cluster_vecs: Vec<G> = Vec::with_capacity(tot_clusters as usize);
let mut unclustered_points: Vec<G> = Vec::new();
for _ in 0..tot_clusters {
cluster_vecs.push(def_aggregator());
}
Expand All @@ -518,9 +527,17 @@ pub fn aggregate_clusters<
let cluster_idx = *cluster_id as usize - 1;
cluster_vecs[cluster_idx].add(&(elements[point_index]));
}
ClusterLabel::Noise => {
if keep_unclustered {
let mut oe = def_aggregator();
oe.add(&elements[point_index]);
unclustered_points.push(oe);
}
}
_ => {}
}
}
cluster_vecs.extend(unclustered_points);
cluster_vecs
};

Expand Down Expand Up @@ -564,6 +581,7 @@ pub fn dbscan_generic<
def_aggregator: F,
extra_filter_fun: Option<&FF>,
log_level: Option<utils::LogLevel>,
keep_unclustered: bool,
) -> Vec<R> {
let show_progress = log_level.is_some();
let log_level = match log_level {
Expand Down Expand Up @@ -616,6 +634,7 @@ pub fn dbscan_generic<
prefiltered_peaks,
def_aggregator,
log_level,
keep_unclustered,
)
}

Expand Down Expand Up @@ -682,6 +701,7 @@ pub fn dbscan_denseframes(
TimsPeakAggregator::default,
None::<&FFTimsPeak>,
None,
true,
);

frames::DenseFrame {
Expand Down
12 changes: 9 additions & 3 deletions src/aggregation/tracing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ struct TraceAggregator {
intensity: u64,
rt: RollingSDCalculator<f64, u64>,
ims: RollingSDCalculator<f64, u64>,
num_rt_peaks: usize,
num_peaks: usize,
quad_low_high: QuadLowHigh,
btree_chromatogram: BTreeChromatogram,
Expand All @@ -336,7 +337,8 @@ impl ClusterAggregator<TimeTimsPeak, BaseTrace> for TraceAggregator {
self.rt.add(peak.rt as f64, peak.intensity);
self.ims.add(peak.ims as f64, peak.intensity);
self.btree_chromatogram.add(peak.rt, peak.intensity);
self.num_peaks += 1;
self.num_rt_peaks += 1;
self.num_peaks += peak.n_peaks as usize;
}

fn aggregate(&self) -> BaseTrace {
Expand Down Expand Up @@ -397,6 +399,7 @@ impl ClusterAggregator<TimeTimsPeak, BaseTrace> for TraceAggregator {
rt,
ims,
num_peaks: self.num_peaks + other.num_peaks,
num_rt_peaks: self.num_rt_peaks + other.num_rt_peaks,
quad_low_high: self.quad_low_high,
btree_chromatogram,
}
Expand Down Expand Up @@ -479,11 +482,13 @@ fn _combine_single_window_traces(
rt: RollingSDCalculator::default(),
ims: RollingSDCalculator::default(),
num_peaks: 0,
num_rt_peaks: 0,
quad_low_high: window_quad_low_high,
btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize.clone()),
},
None::<&FFTimeTimsPeak>,
None,
false,
);

debug!("Combined traces: {}", foo.len());
Expand Down Expand Up @@ -667,8 +672,8 @@ pub fn combine_pseudospectra(
// peak_width_prior: 0.75,
};

const IOU_THRESH: f32 = 0.1;
const COSINE_THRESH: f32 = 0.8;
const IOU_THRESH: f32 = 0.01;
const COSINE_THRESH: f32 = 0.7;
let extra_filter_fun = |x: &BaseTrace, y: &BaseTrace| {
let close_in_quad = (x.quad_center - y.quad_center).abs() < 5.0;
if !close_in_quad {
Expand All @@ -691,6 +696,7 @@ pub fn combine_pseudospectra(
PseudoSpectrumAggregator::default,
Some(&extra_filter_fun),
Some(utils::LogLevel::INFO),
false,
);

info!("Combined pseudospectra: {}", foo.len());
Expand Down
4 changes: 3 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ fn main() {
let out_traces_path = out_path_dir.join("chr_traces_debug.csv");

if true {
log::info!("Reading DIA data from: {}", path_use);
let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising(
path_use.clone(),
config.denoise_config.ms2_min_n.into(),
Expand Down Expand Up @@ -197,7 +198,8 @@ fn main() {

println!("traces: {:?}", traces.len());
traces.retain(|x| x.num_agg > 5);
traces.retain(|x| x.num_rt_points > 2);
traces.retain(|x| x.num_rt_points >= 2);
println!("traces: {:?}", traces[traces.len()-5]);
println!("traces: {:?}", traces.len());

// Maybe reparametrize as 1.1 cycle time
Expand Down
23 changes: 13 additions & 10 deletions src/scoring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ pub fn score_pseudospectra(
// 1. Buid raw spectra from the pseudospectra

let take_top_n = 250;
let min_fragment_mz = 250.;
let min_fragment_mz = 150.;
let max_fragment_mz = 2000.;
let deisotope = true;
let deisotope = false;

let specs = elems
.into_par_iter()
Expand Down Expand Up @@ -220,9 +220,9 @@ pub fn score_pseudospectra(
fragment_min_mz: min_fragment_mz,
fragment_max_mz: max_fragment_mz,
peptide_min_mass: 500.,
peptide_max_mass: 3500.,
peptide_max_mass: 4000.,
ion_kinds: vec![Kind::B, Kind::Y],
min_ion_index: 2,
min_ion_index: 1,
static_mods: (static_mods),
variable_mods: (variable_mods),
max_variable_mods: 1,
Expand All @@ -240,27 +240,30 @@ pub fn score_pseudospectra(

let db = parameters.clone().build(sage_fasta);

let precursor_tolerance = Tolerance::Ppm(-15., 15.);
// Right now the precursor toleranec should be ignored
// bc we are using wide window mode for the search.
let precursor_tolerance = Tolerance::Da(-15., 15.);
let scorer = Scorer {
db: &db,
precursor_tol: precursor_tolerance,
fragment_tol: Tolerance::Ppm(-10., 10.),
fragment_tol: Tolerance::Da(-0.02, 0.02),
min_matched_peaks: 3,
min_isotope_err: 0,
max_isotope_err: 0,
min_precursor_charge: 2,
min_precursor_charge: 1,
max_precursor_charge: 4,
max_fragment_charge: Some(1),
max_fragment_charge: Some(2),
min_fragment_mass: 200.,
max_fragment_mass: 4000.,
chimera: true,
report_psms: 2,
chimera: false,
report_psms: 1,
wide_window: true,
annotate_matches: false,
};

let progbar = indicatif::ProgressBar::new(spectra.len() as u64);

log::info!("Scoring pseudospectra ...");
let mut features = spectra
.par_iter()
.progress_with(progbar)
Expand Down
4 changes: 4 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ pub struct Stats {
pub skew: f64,
pub kurtosis: f64,
pub n: u64,
pub min: Option<f64>,
pub max: Option<f64>,
}

pub fn get_stats(data: &[f64]) -> Stats {
Expand All @@ -362,6 +364,8 @@ pub fn get_stats(data: &[f64]) -> Stats {
skew: sd_calc.get_skew(),
kurtosis: sd_calc.get_kurtosis(),
n: sd_calc.n,
min: sd_calc.get_min(),
max: sd_calc.get_max(),
}
}

Expand Down

0 comments on commit ab2a309

Please sign in to comment.