diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd16ba2..614e968 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,3 +8,11 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files +- repo: local + hooks: + - id: rustfmt + name: rustfmt + description: Check if all files follow the rustfmt style + entry: cargo fmt --all -- --check --color always + language: system + pass_filenames: false diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index ecd21bd..8cc357f 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -1,4 +1,3 @@ - use log::warn; use num_traits::AsPrimitive; @@ -16,7 +15,10 @@ pub struct BTreeChromatogram { } #[derive(Debug, Clone, Copy)] -pub struct ChromatogramArray + AddAssign + Default + AsPrimitive, const NBINS: usize>{ +pub struct ChromatogramArray< + T: Mul + AddAssign + Default + AsPrimitive, + const NBINS: usize, +> { pub chromatogram: [T; NBINS], pub rt_binsize: f32, pub rt_bin_offset: Option, @@ -103,7 +105,7 @@ impl BTreeChromatogram { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { - return None + return None; } // This would be the offset needed to align the two chromatograms @@ -111,7 +113,8 @@ impl BTreeChromatogram { // be bin number `other_vs_self_offset` in other. // This line will also return None if either of the chromatograms // has no bin offset set. - let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; + let other_vs_self_offset = + ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; let (min, max) = self.int_range()?; let (min_o, max_o) = other.int_range()?; @@ -137,7 +140,10 @@ impl BTreeChromatogram { Some(cosine) } - pub fn as_chromatogram_array(&self, center_rt: Option) -> ChromatogramArray { + pub fn as_chromatogram_array( + &self, + center_rt: Option, + ) -> ChromatogramArray { let mut chromatogram_arr = [0.; NUM_LOCAL_CHROMATOGRAM_BINS]; let max_chr_arr_width = NUM_LOCAL_CHROMATOGRAM_BINS as f32 * self.rt_binsize; @@ -151,7 +157,8 @@ impl BTreeChromatogram { // The chromatogram uses the bin size of the chromatogram btree // but re-centers it to the mean RT of the trace if !self.btree.is_empty() { - let int_center = ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32; + let int_center = + ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32; let left_start = int_center - (NUM_LOCAL_CHROMATOGRAM_BINS / 2) as i32; for i in 0..NUM_LOCAL_CHROMATOGRAM_BINS { @@ -168,13 +175,14 @@ impl BTreeChromatogram { } } -impl + AddAssign + Default + AsPrimitive, const NBINS:usize> ChromatogramArray { - +impl + AddAssign + Default + AsPrimitive, const NBINS: usize> + ChromatogramArray +{ pub fn cosine_similarity(&self, other: &Self) -> Option { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { - return None + return None; } // This would be the offset needed to align the two chromatograms @@ -182,7 +190,8 @@ impl + AddAssign + Default + AsPrimitive, const NBINS:us // be bin number `other_vs_self_offset` in other. // This line will also return None if either of the chromatograms // has no bin offset set. - let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; + let other_vs_self_offset = + ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; let mut dot = T::default(); let mut mag_a = T::default(); @@ -247,7 +256,7 @@ mod chromatogram_tests { } #[test] - fn test_chromatogram_array_cosine(){ + fn test_chromatogram_array_cosine() { let mut c = ChromatogramArray:: { chromatogram: [0; 5], rt_binsize: 1., @@ -277,7 +286,6 @@ mod chromatogram_tests { c.chromatogram[4] = 20; let cosine = c.cosine_similarity(&c2).unwrap(); assert!(cosine <= 0.9, "Cosine: {}", cosine); - } #[test] @@ -305,7 +313,6 @@ mod chromatogram_tests { c.add(2., 3); c.add(5., 5); - let mut c2 = BTreeChromatogram::new(1., 1.55); // With bin offset of 1.55 and binsize 1.0, bin 0 is [1.55, 2.55) diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs index ebffcc4..28ed4ce 100644 --- a/src/aggregation/converters.rs +++ b/src/aggregation/converters.rs @@ -1,7 +1,6 @@ - use crate::ms::frames::TimsPeak; -use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::NDPoint; +use crate::space::space_generics::NDPointConverter; // https://github.com/rust-lang/rust/issues/35121 // The never type is not stable yet.... diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index 82ce70b..c18f86c 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -249,8 +249,6 @@ fn _dbscan< let mut seed_set: Vec<&usize> = Vec::new(); seed_set.extend(neighbors); - let mut internal_neighbor_additions = 0; - while let Some(neighbor) = seed_set.pop() { let neighbor_index = *neighbor; if cluster_labels[neighbor_index] == ClusterLabel::Noise { @@ -316,7 +314,6 @@ fn _dbscan< }); local_neighbor_filter_timer.stop(false); - internal_neighbor_additions += local_neighbors.len(); seed_set.extend(local_neighbors); } } @@ -409,6 +406,7 @@ fn reassign_centroid< timer.stop(true); out } + // TODO: rename prefiltered peaks argument! // TODO implement a version that takes a sparse distance matrix. diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 04e8dc4..43ed723 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -1,6 +1,6 @@ +pub mod aggregators; +pub mod chromatograms; +pub mod converters; pub mod dbscan; pub mod ms_denoise; -pub mod converters; -pub mod aggregators; pub mod tracing; -pub mod chromatograms; diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index edf1572..0fad03e 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -4,7 +4,7 @@ use crate::aggregation::dbscan; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; -use crate::ms::frames::FrameQuadWindow; +use crate::ms::frames::FrameSlice; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; use crate::utils; @@ -151,14 +151,14 @@ fn _denoise_dia_frame( .get_dia_frame_window_group(frame.index) .unwrap(); let frame_windows = dia_frame_info - .split_frame(frame, window_group) + .split_frame(&frame, window_group) .expect("Only DIA frames should be passed to this function"); frame_windows .into_iter() .map(|frame_window| { - denoise_frame_window( - frame_window, + denoise_frame_slice( + &frame_window, ims_converter, mz_converter, dia_frame_info, @@ -173,8 +173,8 @@ fn _denoise_dia_frame( .collect::>() } -fn denoise_frame_window( - frame_window: FrameQuadWindow, +fn denoise_frame_slice( + frame_window: &FrameSlice, ims_converter: &timsrust::Scan2ImConverter, mz_converter: &timsrust::Tof2MzConverter, dia_frame_info: &DIAFrameInfo, @@ -203,8 +203,8 @@ fn denoise_frame_window( DenseFrameWindow { frame: denoised_frame, - ims_start: denseframe_window.ims_start, - ims_end: denseframe_window.ims_end, + ims_min: denseframe_window.ims_min, + ims_max: denseframe_window.ims_max, mz_start: denseframe_window.mz_start, mz_end: denseframe_window.mz_end, group_id: denseframe_window.group_id, @@ -306,7 +306,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> { info!("Denoising {} frames", elems.len()); - let frame_window_slices = self.dia_frame_info.split_frame_windows(elems); + let frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); let mut out = Vec::with_capacity(frame_window_slices.len()); for sv in frame_window_slices { let progbar = indicatif::ProgressBar::new(sv.len() as u64); @@ -314,8 +314,8 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> .into_par_iter() .progress_with(progbar) .map(|x| { - denoise_frame_window( - x, + denoise_frame_slice( + &x, &self.ims_converter, &self.mz_converter, &self.dia_frame_info, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index a76def0..20e7b68 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,18 +1,20 @@ -use crate::aggregation::dbscan::dbscan_generic; use crate::aggregation::aggregators::ClusterAggregator; +use crate::aggregation::chromatograms::{ + BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, +}; +use crate::aggregation::dbscan::dbscan_generic; use crate::ms::frames::DenseFrameWindow; +use crate::space::space_generics::NDBoundary; use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; use crate::utils; use crate::utils::RollingSDCalculator; -use crate::space::space_generics::NDBoundary; -use crate::aggregation::chromatograms::{BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS}; +use core::panic; use log::{debug, error, info, warn}; use rayon::iter::IntoParallelIterator; use rayon::prelude::*; +use serde::ser::{SerializeStruct, Serializer}; use serde::{Deserialize, Serialize}; -use serde::ser::{Serializer, SerializeStruct}; -use core::panic; use std::error::Error; use std::io::Write; use std::path::Path; @@ -93,10 +95,12 @@ impl Serialize for BaseTrace { state.serialize_field("chromatogram", &format!("{:?}", chromatogram))?; state.end() } - } -pub fn write_trace_csv(traces: &Vec, path: impl AsRef) -> Result<(), Box> { +pub fn write_trace_csv( + traces: &Vec, + path: impl AsRef, +) -> Result<(), Box> { let mut wtr = csv::Writer::from_path(path).unwrap(); for trace in traces { wtr.serialize(trace)?; @@ -272,7 +276,6 @@ pub fn combine_traces( out } - #[derive(Debug, Clone)] struct TraceAggregator { mz: RollingSDCalculator, @@ -313,7 +316,8 @@ impl ClusterAggregator for TraceAggregator { // The chromatogram is an array centered on the retention time let num_rt_points = self.btree_chromatogram.btree.len(); - let chromatogram: ChromatogramArray = self.btree_chromatogram.as_chromatogram_array(Some(rt)); + let chromatogram: ChromatogramArray = + self.btree_chromatogram.as_chromatogram_array(Some(rt)); // let apex = chromatogram.chromatogram.iter().enumerate().max_by_key(|x| (x.1 * 100.) as i32).unwrap().0; // let apex_offset = (apex as f32 - (NUM_LOCAL_CHROMATOGRAM_BINS as f32 / 2.)) * self.btree_chromatogram.rt_binsize; @@ -391,7 +395,6 @@ impl NDPointConverter for BypassBaseTraceBackConverter { } } - fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec { denseframe_windows .into_iter() @@ -415,7 +418,6 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec bool; - // TODO maybe this can be a builder-> executor pattern fn _combine_single_window_traces( prefiltered_peaks: Vec, @@ -603,10 +605,7 @@ impl NDPointConverter for BaseTraceConverter { fn convert_to_bounds_query<'a>( &self, point: &'a NDPoint<3>, - ) -> ( - NDBoundary<3>, - Option<&'a NDPoint<3>>, - ) { + ) -> (NDBoundary<3>, Option<&'a NDPoint<3>>) { const NUM_DIMENTIONS: usize = 3; // let range_center = (point.values[1] + point.values[2]) / 2.; let mut starts = point.values; @@ -647,9 +646,6 @@ impl NDPointConverter for PseudoScanBackConverter { } } - - - #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct PseudoscanGenerationConfig { pub rt_scaling: f32, @@ -677,7 +673,6 @@ impl Default for PseudoscanGenerationConfig { } } - pub fn combine_pseudospectra( traces: Vec, config: PseudoscanGenerationConfig, @@ -689,7 +684,6 @@ pub fn combine_pseudospectra( rt_scaling: config.rt_scaling.into(), ims_scaling: config.ims_scaling.into(), quad_scaling: config.quad_scaling.into(), - // rt_start_end_ratio: 2., // peak_width_prior: 0.75, }; diff --git a/src/main.rs b/src/main.rs index 165b049..43081d6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -108,24 +108,31 @@ fn main() { } // TODO: consier moving this to the config struct as an implementation. - let out_path_scans = config.output_config.debug_scans_json.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); - let out_traces_path = config.output_config.debug_traces_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); - let out_path_features = config.output_config.out_features_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); + let out_path_scans = config + .output_config + .debug_scans_json + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); + let out_traces_path = config + .output_config + .debug_traces_csv + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); + let out_path_features = config + .output_config + .out_features_csv + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); log::info!("Reading DIA data from: {}", path_use); - let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising( - path_use.clone(), - config.denoise_config, - ); + let (dia_frames, dia_info) = + aggregation::ms_denoise::read_all_dia_denoising(path_use.clone(), config.denoise_config); let cycle_time = dia_info.calculate_cycle_time(); // TODO add here expansion limits - let mut traces = aggregation::tracing::combine_traces( - dia_frames, - config.tracing_config, - cycle_time, - ); + let mut traces = + aggregation::tracing::combine_traces(dia_frames, config.tracing_config, cycle_time); let out = match out_traces_path { Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), @@ -147,23 +154,19 @@ fn main() { // Maybe reparametrize as 1.1 cycle time // TODO add here expansion limits - let mut pseudoscans = aggregation::tracing::combine_pseudospectra( - traces, - config.pseudoscan_generation_config, - ); + let mut pseudoscans = + aggregation::tracing::combine_pseudospectra(traces, config.pseudoscan_generation_config); // Report min/max/average/std and skew for ims and rt // This can probably be a macro ... - let ims_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); + let ims_stats = utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); let ims_sd_stats = utils::get_stats( &pseudoscans .iter() .map(|x| x.ims_std as f64) .collect::>(), ); - let rt_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); + let rt_stats = utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); let rt_sd_stats = utils::get_stats( &pseudoscans .iter() diff --git a/src/ms/frames.rs b/src/ms/frames.rs index a2ec67c..daf8cc8 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -1,4 +1,3 @@ - pub use timsrust::Frame; pub use timsrust::FrameType; pub use timsrust::{ @@ -38,6 +37,25 @@ pub enum SortingOrder { Intensity, } +/// Information on the context of a window in a frame. +/// +/// This adds to a frame slice the context of the what isolation was used +/// to generate the frame slice. +#[derive(Debug, Clone)] +pub struct FrameMsMsWindowInfo { + pub mz_start: f32, + pub mz_end: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, + pub global_quad_row_id: usize, +} + +#[derive(Debug, Clone)] +pub enum MsMsFrameSliceWindowInfo { + WindowGroup(usize), + SingleWindow(FrameMsMsWindowInfo), +} + /// Unprocessed data from a 'Frame' after breaking by quad isolation_window + ims window. /// /// 1. every tof-index + intensity represents a peak. @@ -52,31 +70,68 @@ pub enum SortingOrder { /// calibration) /// /// Frame Example values -/// - Scan offsets. [0,0,0,0,0,3,5,6 ...] n=number of scans -/// - tof indices. [100, 101, 102, 10, 20, 30 ...] len = len(intensities) -/// - intensities. [123, 111, 12 , 3, 4, 1 ...] len = len(tof indices) -/// - index 34 +/// - Scan offsets. `[0,0,0,0,0,3,5,6 ...]` n=number of scans +/// - tof indices. `[100, 101, 102, 10, 20, 30 ...]` len = len(intensities) +/// - intensities. `[123, 111, 12 , 3, 4, 1 ...]` len = len(tof indices) /// - rt 65.34 -/// Additions for FrameQuadWindow: +/// +/// Renamed from the frame: +/// - parent_frame_index 34 // renamed from Frame.index for clarity. +/// +/// Additions for FrameSlice: /// - scan_start 123 // The scan number of the first scan offset in the current window. -/// - group_id 1 // The group id of the current window. -/// - quad_group_id 2 // The quad group id of the current window within the current group. -/// - quad_row_id 3 // The quad row id of the current window within all quad windows. +/// - slice_window_info Some(MsMsFrameSliceWindowInfo::SingleWindow(FrameMsMsWindow)) #[derive(Debug, Clone)] -pub struct FrameQuadWindow { - pub scan_offsets: Vec, - pub tof_indices: Vec, - pub intensities: Vec, - pub index: usize, +pub struct FrameSlice<'a> { + pub scan_offsets: &'a [usize], + pub tof_indices: &'a [u32], + pub intensities: &'a [u32], + pub parent_frame_index: usize, pub rt: f64, pub frame_type: FrameType, // From this point on they are local implementations // Before they are used from the timsrust crate. pub scan_start: usize, - pub group_id: usize, - pub quad_group_id: usize, - pub quad_row_id: usize, + pub slice_window_info: Option, +} + +impl<'a> FrameSlice<'a> { + pub fn slice_frame( + frame: &'a Frame, + scan_start: usize, + scan_end: usize, + slice_window_info: Option, + ) -> FrameSlice<'a> { + let scan_offsets = &frame.scan_offsets[scan_start..=scan_end]; + let scan_start = scan_offsets[0]; + + let indprt_start = scan_offsets[0]; + let indptr_end = *scan_offsets.last().expect("Scan range is empty"); + + let tof_indices = &frame.tof_indices[indprt_start..indptr_end]; + let intensities = &frame.intensities[indprt_start..indptr_end]; + debug_assert!(tof_indices.len() == intensities.len()); + debug_assert!(indptr_end - indprt_start == tof_indices.len() as usize); + #[cfg(debug_assertions)] + { + for i in 1..(scan_offsets.len() - 1) { + debug_assert!(scan_offsets[i] <= scan_offsets[i + 1]); + debug_assert!((scan_offsets[i + 1] - scan_start) <= tof_indices.len() as usize); + } + } + + FrameSlice { + scan_offsets, + tof_indices, + intensities, + parent_frame_index: frame.index, + rt: frame.rt, + frame_type: frame.frame_type, + scan_start, + slice_window_info: slice_window_info, + } + } } #[derive(Debug, Clone)] @@ -91,8 +146,8 @@ pub struct DenseFrame { #[derive(Debug, Clone)] pub struct DenseFrameWindow { pub frame: DenseFrame, - pub ims_start: f32, - pub ims_end: f32, + pub ims_min: f32, + pub ims_max: f32, pub mz_start: f64, pub mz_end: f64, pub group_id: usize, @@ -101,36 +156,60 @@ pub struct DenseFrameWindow { impl DenseFrameWindow { pub fn from_frame_window( - frame_window: FrameQuadWindow, + frame_window: &FrameSlice, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, dia_info: &DIAFrameInfo, ) -> DenseFrameWindow { - let group_id = frame_window.group_id; - let quad_group_id = frame_window.quad_group_id; - let scan_start = frame_window.scan_start; + let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { + None => { + panic!("No window info") + // This branch points to an error in logic ... + // The window info should always be present in this context. + } + Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { + // This branch should be easy to implement for things like synchro pasef... + // Some details to iron out though ... + panic!("Not implemented") + } + Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { + let window_group_id = x.window_group_id; + let ww_quad_group_id = x.within_window_quad_group_id; + let scan_start = frame_window.scan_start; + (window_group_id, ww_quad_group_id, scan_start) + } + }; // NOTE: I am swapping here the 'scan start' to be the `ims_end` because // the first scans have lower 1/k0 values. - let ims_end = ims_converter.convert(scan_start as u32) as f32; - let ims_start = + let ims_max = ims_converter.convert(scan_start as u32) as f32; + let ims_min = ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; - let scan_range: &ScanRange = dia_info - .get_quad_windows(group_id, quad_group_id) - .expect("Quad group id should be valid"); - let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + debug_assert!(ims_max <= ims_min); + + let scan_range: Option<&ScanRange> = + dia_info.get_quad_windows(window_group_id, ww_quad_group_id); + let scan_range = match scan_range { + Some(x) => x, + None => { + panic!( + "No scan range for window_group_id: {}, within_window_quad_group_id: {}", + window_group_id, ww_quad_group_id + ); + } + }; - debug_assert!(ims_start <= ims_end); + let frame = DenseFrame::from_frame_window(&frame_window, ims_converter, mz_converter); DenseFrameWindow { frame, - ims_start, - ims_end, + ims_min, + ims_max, mz_start: scan_range.iso_low as f64, mz_end: scan_range.iso_high as f64, - group_id, - quad_group_id, + group_id: window_group_id, + quad_group_id: ww_quad_group_id, } } } @@ -183,7 +262,7 @@ impl DenseFrame { } pub fn from_frame_window( - frame_window: FrameQuadWindow, + frame_window: &FrameSlice, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, ) -> DenseFrame { @@ -193,7 +272,7 @@ impl DenseFrame { let num_tofs = index_offset - last_scan_offset; let scan_index_use = (scan_index + frame_window.scan_start) as u32; - let ims = ims_converter.convert(scan_index_use) as f32; + let ims = ims_converter.convert(scan_index as f64) as f32; if ims < 0.0 { info!("Negative IMS value: {}", ims); info!("scan_index_use: {}", scan_index_use); @@ -204,7 +283,7 @@ impl DenseFrame { expanded_scan_indices.extend(vec![ims; num_tofs as usize]); last_scan_offset = *index_offset; } - debug_assert!(last_scan_offset == frame_window.tof_indices.len() as u64); + debug_assert!(last_scan_offset == frame_window.tof_indices.len()); let peaks = expanded_scan_indices .iter() @@ -224,7 +303,7 @@ impl DenseFrame { } } - let index = frame_window.index; + let index = frame_window.parent_frame_index; let rt = frame_window.rt; let frame_type = frame_window.frame_type; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 2d5f5c5..87d44b3 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,13 +1,13 @@ use log::{debug, info, trace}; use sqlx::Pool; -use sqlx::{FromRow, Row, Sqlite, SqlitePool}; -use std::path::{Path}; +use sqlx::{FromRow, Sqlite, SqlitePool}; +use std::path::Path; use timsrust::{ConvertableIndex, Frame}; use tokio; use tokio::runtime::Runtime; -use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; +use crate::ms::frames::{FrameMsMsWindowInfo, FrameSlice, MsMsFrameSliceWindowInfo}; // Diaframemsmsinfo = vec of frame_id -> windowgroup_id // diaframemsmswindows = vec[(windowgroup_id, scanstart, scanend, iso_mz, iso_with, nce)] @@ -24,11 +24,15 @@ pub struct ScanRange { pub ims_end: f32, pub iso_low: f32, pub iso_high: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, } impl ScanRange { pub fn new( row_id: usize, + window_group_id: usize, + within_window_quad_group_id: usize, scan_start: usize, scan_end: usize, iso_mz: f32, @@ -57,13 +61,27 @@ impl ScanRange { ims_end: ims_end as f32, iso_low, iso_high, + window_group_id, + within_window_quad_group_id, + } + } +} + +impl Into for ScanRange { + fn into(self) -> FrameMsMsWindowInfo { + FrameMsMsWindowInfo { + mz_start: self.iso_low, + mz_end: self.iso_high, + window_group_id: self.window_group_id.into(), + within_window_quad_group_id: self.within_window_quad_group_id.into(), + global_quad_row_id: self.row_id.into(), } } } #[derive(Debug, Clone)] pub struct DIAWindowGroup { - pub id: usize, + pub window_group_id: usize, pub scan_ranges: Vec, } @@ -92,8 +110,10 @@ pub struct DIAFrameInfo { impl DIAFrameInfo { pub fn get_dia_frame_window_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { let group_id = self.frame_groups[frame_id]; - group_id?; - self.groups[group_id.unwrap()].as_ref() + match group_id { + None => None, + Some(group_id) => self.groups[group_id].as_ref(), + } } async fn rts_from_tdf_connection(conn: &Pool) -> Result>, sqlx::Error> { @@ -156,55 +176,87 @@ impl DIAFrameInfo { avg_cycle_time } - pub fn split_frame(&self, frame: Frame, window_group: &DIAWindowGroup) -> Result, &'static str> { + pub fn split_frame<'a, 'b>( + &'b self, + frame: &'a Frame, + window_group: &DIAWindowGroup, + ) -> Result, &'static str> + where + 'a: 'b, + { // let group = self // .get_dia_frame_window_group(frame.index) // .expect("Frame not in DIA group, non splittable frame passed to split_frame."); let mut out_frames = Vec::new(); - for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { - scan_range.scan_start; - scan_range.scan_end; - - let scan_offsets_use = - &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)]; - let scan_start = scan_offsets_use[0]; - let mz_indptr_start = scan_offsets_use[0]; - let mz_indptr_end = *scan_offsets_use.last().unwrap(); - - let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); - let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); - - let frame_window = FrameQuadWindow { - scan_offsets: scan_offsets_use - .iter() - .map(|x| (x - scan_start) as u64) - .collect::>(), - tof_indices: tof_indices_keep, - intensities: intensities_keep, - index: frame.index, - rt: frame.rt, - frame_type: frame.frame_type, - scan_start: scan_range.scan_start, - group_id: window_group.id, - quad_group_id: i, - quad_row_id: scan_range.row_id, - }; - - out_frames.push(frame_window); + for scan_range in window_group.scan_ranges.iter() { + let slice_w_info: MsMsFrameSliceWindowInfo = + MsMsFrameSliceWindowInfo::SingleWindow(scan_range.clone().into()); + let frame_slice = FrameSlice::slice_frame( + &frame, + scan_range.scan_start, + scan_range.scan_end, + Some(slice_w_info), + ); + out_frames.push(frame_slice); + + // TODO remove this old implementation + // for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { + + // scan_range.scan_start; + // scan_range.scan_end; + + // let scan_offsets_use = + // &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)]; + // let scan_start = scan_offsets_use[0]; + // let mz_indptr_start = scan_offsets_use[0]; + // let mz_indptr_end = *scan_offsets_use.last().unwrap(); + + // let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); + // let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); + + // let frame_window = FrameSlice { + // scan_offsets: scan_offsets_use + // .iter() + // .map(|x| (x - scan_start) as u64) + // .collect::>(), + // tof_indices: tof_indices_keep, + // intensities: intensities_keep, + // index: frame.index, + // rt: frame.rt, + // frame_type: frame.frame_type, + // scan_start: scan_range.scan_start, + // group_id: window_group.id, + // quad_group_id: i, + // quad_row_id: scan_range.row_id, + // }; + + // out_frames.push(frame_window); } Ok(out_frames) } - pub fn split_frame_windows(&self, frames: Vec) -> Vec> { + pub fn split_frame_windows<'a>(&'a self, frames: &'a [Frame]) -> Vec> { let mut out = Vec::new(); - for _ in 0..self.groups.len() { - out.push(Vec::new()); + + match self.grouping_level { + GroupingLevel::WindowGroup => { + for _ in 0..(self.groups.len() + 1) { + out.push(Vec::new()); + } + } + GroupingLevel::QuadWindowGroup => { + for _ in 0..(self.row_to_group.len() + 1) { + out.push(Vec::new()); + } + } } for frame in frames { - let group = self.get_dia_frame_window_group(frame.index).expect("Frame is not in MS2 frames"); + let group = self + .get_dia_frame_window_group(frame.index) + .expect("Frame is not in MS2 frames"); match self.grouping_level { GroupingLevel::WindowGroup => { @@ -212,9 +264,21 @@ impl DIAFrameInfo { //out[group.id].push(frame_window); } GroupingLevel::QuadWindowGroup => { - let frame_windows = self.split_frame(frame, group).expect("Error splitting frame"); + let frame_windows = self + .split_frame(&frame, group) + .expect("Error splitting frame"); for frame_window in frame_windows { - out[frame_window.quad_group_id].push(frame_window); + match &frame_window.slice_window_info { + None => { + panic!("Frame window has no slice window info") + } + Some(MsMsFrameSliceWindowInfo::SingleWindow(scan_range)) => { + out[scan_range.global_quad_row_id].push(frame_window); + } + Some(MsMsFrameSliceWindowInfo::WindowGroup(group)) => { + out[*group].push(frame_window); + } + } } } } @@ -225,150 +289,15 @@ impl DIAFrameInfo { group.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); } - out - } - - pub fn split_dense_frame(&self, mut denseframe: DenseFrame) -> Vec { - let group = self - .get_dia_frame_window_group(denseframe.index) - .expect("Frame not in DIA group"); - - // Steps - // 1. Sort by ims - // 2. Get ims bounds - // 3. Binary search for start and end - denseframe.sort_by_mobility(); - let mut frames = Vec::new(); - let imss = denseframe - .raw_peaks - .iter() - .map(|peak| peak.mobility) - .collect::>(); - for (i, scan_range) in group.scan_ranges.iter().enumerate() { - let start = imss.binary_search_by(|v| { - v.partial_cmp(&scan_range.ims_start) - .expect("Couldn't compare values") - }); - - let start = match start { - Ok(x) => x, - Err(x) => x, - }; - - let end = imss.binary_search_by(|v| { - v.partial_cmp(&scan_range.ims_end) - .expect("Couldn't compare values") - }); - - // i might need to add 1 here to make the range [closed, open) - let end = match end { - Ok(x) => x, - Err(x) => x, - }; - - let frame = DenseFrame { - raw_peaks: denseframe.raw_peaks[start..end].to_vec(), - index: denseframe.index, - rt: denseframe.rt, - frame_type: denseframe.frame_type, - sorted: denseframe.sorted, - }; - - let frame_window = DenseFrameWindow { - frame, - ims_start: scan_range.ims_start, - ims_end: scan_range.ims_end, - mz_start: scan_range.iso_low.into(), - mz_end: scan_range.iso_high.into(), - group_id: group.id, - quad_group_id: i, - }; - frames.push(frame_window); - } - - frames - } - - /// Returns a vector of length equal to the number of groups. - /// Each element is a vector of frames that belong to that group. - fn bundle_by_group(&self, frames: Vec) -> Vec> { - let mut frame_groups = Vec::new(); - for frame in frames { - let group = self.get_dia_frame_window_group(frame.index); - if group.is_none() { - continue; - } - let group = group.unwrap(); - let group_id = group.id; - if frame_groups.len() <= group_id { - frame_groups.resize(group_id + 1, Vec::new()); - } - frame_groups[group_id].push(frame); - } - frame_groups - } - - pub fn split_dense_frames(&self, frames: Vec) -> Vec>> { - info!("Splitting {} frames", frames.len()); - - // Returns a vector of length equal to the number of groups. - // Each element is a vector with length equal to the number of quad groups within - // that group. - // Each element of that vector is a vector of frames that belong to that quad group. - let max_num_quad_groups = self - .groups - .iter() - .map(|group| { - if group.is_none() { - 0 - } else { - group.as_ref().unwrap().scan_ranges.len() - } - }) - .max() - .unwrap(); - - let num_groups = self.groups.len(); - - let mut out = Vec::new(); - for _ in 0..num_groups { - let mut group_vec = Vec::new(); - for _ in 0..max_num_quad_groups { - group_vec.push(Vec::new()); - } - out.push(group_vec); - } - - let bundled_split_frames = self.bundle_by_group(frames); - for (i, frame_bundle) in bundled_split_frames.into_iter().enumerate() { - info!("Processing group {}", i); - for frame in frame_bundle { - let frame_windows = self.split_dense_frame(frame); - for frame_window in frame_windows { - out[i][frame_window.quad_group_id].push(frame_window); + // Debug assert that the frames are sorted by rt + if cfg!(debug_assertions) { + for group in out.iter() { + for i in 0..(group.len() - 1) { + debug_assert!(group[i].rt <= group[i + 1].rt); } } } - let counts = out - .iter() - .map(|group| { - group - .iter() - .map(|quad_group| quad_group.len()) - .collect::>() - }) - .collect::>(); - - trace!("Counts: {:?}", counts); - - for (i, group) in counts.iter().enumerate() { - trace!("Group {}", i); - for (j, quad_group) in group.iter().enumerate() { - trace!(" Quad group {}: {}", j, quad_group); - } - } - out } @@ -377,15 +306,33 @@ impl DIAFrameInfo { scan_group_id: usize, quad_group_id: usize, ) -> Option<&ScanRange> { - let group = self.groups[scan_group_id].as_ref()?; - let quad_group = group.scan_ranges.get(quad_group_id)?; + let group = self.groups[scan_group_id].as_ref(); + let group = match group { + None => { + panic!( + "Group not found for scan group id: {}, in groups n={}", + scan_group_id, + self.groups.len() + ) + } + Some(group) => group, + }; + + let quad_group = group.scan_ranges.get(quad_group_id); + let quad_group = match quad_group { + None => { + panic!( + "Quad group not found for quad group id: {}, in scan_ranges {:?}", + quad_group_id, group.scan_ranges + ) + } + Some(quad_group) => quad_group, + }; + Some(quad_group) } } -// TODO implement splitting frames into dia group+quad groups. -// [usize, math::round::floor(quad_mz_center)] - // Reference for the tables: // CREATE TABLE DiaFrameMsMsInfo ( @@ -408,18 +355,31 @@ impl DIAFrameInfo { #[derive(Clone, FromRow, Debug)] pub struct DiaFrameMsMsWindowInfo { + #[sqlx(rename = "WindowGroup")] pub window_group: i32, + #[sqlx(rename = "ScanNumBegin")] pub scan_num_begin: i32, + #[sqlx(rename = "ScanNumEnd")] pub scan_num_end: i32, + #[sqlx(rename = "IsolationMz")] pub isolation_mz: f32, + #[sqlx(rename = "IsolationWidth")] pub isolation_width: f32, + #[sqlx(rename = "CollisionEnergy")] pub collision_energy: f32, } impl DiaFrameMsMsWindowInfo { - fn into_scan_range(&self, id: usize, scan_converter: &timsrust::Scan2ImConverter) -> ScanRange { + fn into_scan_range( + &self, + id: usize, + quad_id: usize, + scan_converter: &timsrust::Scan2ImConverter, + ) -> ScanRange { ScanRange::new( id, + self.window_group as usize, + quad_id, self.scan_num_begin as usize, self.scan_num_end as usize, self.isolation_mz, @@ -480,10 +440,19 @@ impl FrameInfoBuilder { None => continue, Some(scan_ranges) => scan_ranges, }; + debug!("Scan ranges i={}: {:?}", i, scan_ranges); + if cfg!(debug_assertions) { + for scan_range in scan_ranges.iter() { + debug_assert!(scan_range.window_group_id == i) + } + }; if scan_ranges.is_empty() { continue; } else { - groups_vec_o[i] = Some(DIAWindowGroup { id: i, scan_ranges }); + groups_vec_o[i] = Some(DIAWindowGroup { + window_group_id: i, + scan_ranges, + }); } } @@ -546,7 +515,7 @@ impl FrameInfoBuilder { ); GroupingLevel::WindowGroup } else { - log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); + log::info!("Less than 200 scan ranges detected, using QuadWindowGroup grouping level. (diaPASEF?)"); GroupingLevel::QuadWindowGroup }; @@ -572,8 +541,12 @@ impl FrameInfoBuilder { match &mut group_map_vec[usize_wg] { None => continue, Some(scan_ranges) => { - scan_ranges - .push(window.into_scan_range(scangroup_id, &self.scan_converter)); + let quad_id = scan_ranges.len(); + scan_ranges.push(window.into_scan_range( + scangroup_id, + quad_id, + &self.scan_converter, + )); scangroup_id += 1; } } diff --git a/src/space/quad.rs b/src/space/quad.rs index 2d0b276..6a65c62 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -73,7 +73,7 @@ impl<'a, T> RadiusQuadTree<'a, T> { // This means any sub-division will be smaller than the radius let query_contained = radius_squared > distance_squared; - if (self.points.len() < self.capacity) || query_contained { + if (self.points.len() < self.capacity) || query_contained { self.points.push((point, data)); } else { self.subdivide(); diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index b3c9094..02e30f5 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,4 +1,3 @@ - #[derive(Debug, Clone, Copy)] pub struct NDBoundary { pub starts: [f32; DIMENSIONALITY],