Skip to content

Commit

Permalink
further implementation of delayed denseframe conversion and early spl…
Browse files Browse the repository at this point in the history
…itting and format
  • Loading branch information
jspaezp committed Jul 6, 2024
1 parent c89cc6c commit 31e29c1
Show file tree
Hide file tree
Showing 12 changed files with 365 additions and 305 deletions.
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,11 @@ repos:
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: local
hooks:
- id: rustfmt
name: rustfmt
description: Check if all files follow the rustfmt style
entry: cargo fmt --all -- --check --color always
language: system
pass_filenames: false
33 changes: 20 additions & 13 deletions src/aggregation/chromatograms.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

use log::warn;
use num_traits::AsPrimitive;

Expand All @@ -16,7 +15,10 @@ pub struct BTreeChromatogram {
}

#[derive(Debug, Clone, Copy)]
pub struct ChromatogramArray <T: Mul<Output = T> + AddAssign + Default + AsPrimitive<f32>, const NBINS: usize>{
pub struct ChromatogramArray<
T: Mul<Output = T> + AddAssign + Default + AsPrimitive<f32>,
const NBINS: usize,
> {
pub chromatogram: [T; NBINS],
pub rt_binsize: f32,
pub rt_bin_offset: Option<f32>,
Expand Down Expand Up @@ -103,15 +105,16 @@ impl BTreeChromatogram {
// Check that the bin size is almost the same
let binsize_diff = (self.rt_binsize - other.rt_binsize).abs();
if binsize_diff > 0.01 {
return None
return None;
}

// This would be the offset needed to align the two chromatograms
// in terms of bins. In other words bin number 0 in self would
// be bin number `other_vs_self_offset` in other.
// This line will also return None if either of the chromatograms
// has no bin offset set.
let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32;
let other_vs_self_offset =
((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32;

let (min, max) = self.int_range()?;
let (min_o, max_o) = other.int_range()?;
Expand All @@ -137,7 +140,10 @@ impl BTreeChromatogram {
Some(cosine)
}

pub fn as_chromatogram_array(&self, center_rt: Option<f32>) -> ChromatogramArray<f32, NUM_LOCAL_CHROMATOGRAM_BINS> {
pub fn as_chromatogram_array(
&self,
center_rt: Option<f32>,
) -> ChromatogramArray<f32, NUM_LOCAL_CHROMATOGRAM_BINS> {
let mut chromatogram_arr = [0.; NUM_LOCAL_CHROMATOGRAM_BINS];

let max_chr_arr_width = NUM_LOCAL_CHROMATOGRAM_BINS as f32 * self.rt_binsize;
Expand All @@ -151,7 +157,8 @@ impl BTreeChromatogram {
// The chromatogram uses the bin size of the chromatogram btree
// but re-centers it to the mean RT of the trace
if !self.btree.is_empty() {
let int_center = ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32;
let int_center =
((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32;
let left_start = int_center - (NUM_LOCAL_CHROMATOGRAM_BINS / 2) as i32;

for i in 0..NUM_LOCAL_CHROMATOGRAM_BINS {
Expand All @@ -168,21 +175,23 @@ impl BTreeChromatogram {
}
}

impl<T: Mul<Output = T> + AddAssign + Default + AsPrimitive<f32>, const NBINS:usize> ChromatogramArray<T, NBINS> {

impl<T: Mul<Output = T> + AddAssign + Default + AsPrimitive<f32>, const NBINS: usize>
ChromatogramArray<T, NBINS>
{
pub fn cosine_similarity(&self, other: &Self) -> Option<f32> {
// Check that the bin size is almost the same
let binsize_diff = (self.rt_binsize - other.rt_binsize).abs();
if binsize_diff > 0.01 {
return None
return None;
}

// This would be the offset needed to align the two chromatograms
// in terms of bins. In other words bin number 0 in self would
// be bin number `other_vs_self_offset` in other.
// This line will also return None if either of the chromatograms
// has no bin offset set.
let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32;
let other_vs_self_offset =
((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32;

let mut dot = T::default();
let mut mag_a = T::default();
Expand Down Expand Up @@ -247,7 +256,7 @@ mod chromatogram_tests {
}

#[test]
fn test_chromatogram_array_cosine(){
fn test_chromatogram_array_cosine() {
let mut c = ChromatogramArray::<i32, 5> {
chromatogram: [0; 5],
rt_binsize: 1.,
Expand Down Expand Up @@ -277,7 +286,6 @@ mod chromatogram_tests {
c.chromatogram[4] = 20;
let cosine = c.cosine_similarity(&c2).unwrap();
assert!(cosine <= 0.9, "Cosine: {}", cosine);

}

#[test]
Expand Down Expand Up @@ -305,7 +313,6 @@ mod chromatogram_tests {
c.add(2., 3);
c.add(5., 5);


let mut c2 = BTreeChromatogram::new(1., 1.55);
// With bin offset of 1.55 and binsize 1.0, bin 0 is [1.55, 2.55)

Expand Down
3 changes: 1 addition & 2 deletions src/aggregation/converters.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

use crate::ms::frames::TimsPeak;
use crate::space::space_generics::NDPointConverter;
use crate::space::space_generics::NDPoint;
use crate::space::space_generics::NDPointConverter;

// https://github.com/rust-lang/rust/issues/35121
// The never type is not stable yet....
Expand Down
4 changes: 1 addition & 3 deletions src/aggregation/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,6 @@ fn _dbscan<
let mut seed_set: Vec<&usize> = Vec::new();
seed_set.extend(neighbors);

let mut internal_neighbor_additions = 0;

while let Some(neighbor) = seed_set.pop() {
let neighbor_index = *neighbor;
if cluster_labels[neighbor_index] == ClusterLabel::Noise {
Expand Down Expand Up @@ -316,7 +314,6 @@ fn _dbscan<
});
local_neighbor_filter_timer.stop(false);

internal_neighbor_additions += local_neighbors.len();
seed_set.extend(local_neighbors);
}
}
Expand Down Expand Up @@ -409,6 +406,7 @@ fn reassign_centroid<
timer.stop(true);
out
}

// TODO: rename prefiltered peaks argument!
// TODO implement a version that takes a sparse distance matrix.

Expand Down
6 changes: 3 additions & 3 deletions src/aggregation/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub mod aggregators;
pub mod chromatograms;
pub mod converters;
pub mod dbscan;
pub mod ms_denoise;
pub mod converters;
pub mod aggregators;
pub mod tracing;
pub mod chromatograms;
22 changes: 11 additions & 11 deletions src/aggregation/ms_denoise.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::aggregation::dbscan;
use crate::ms::frames::Converters;
use crate::ms::frames::DenseFrame;
use crate::ms::frames::DenseFrameWindow;
use crate::ms::frames::FrameQuadWindow;
use crate::ms::frames::FrameSlice;
use crate::ms::tdf;
use crate::ms::tdf::DIAFrameInfo;
use crate::utils;
Expand Down Expand Up @@ -151,14 +151,14 @@ fn _denoise_dia_frame(
.get_dia_frame_window_group(frame.index)
.unwrap();
let frame_windows = dia_frame_info
.split_frame(frame, window_group)
.split_frame(&frame, window_group)
.expect("Only DIA frames should be passed to this function");

frame_windows
.into_iter()
.map(|frame_window| {
denoise_frame_window(
frame_window,
denoise_frame_slice(
&frame_window,
ims_converter,
mz_converter,
dia_frame_info,
Expand All @@ -173,8 +173,8 @@ fn _denoise_dia_frame(
.collect::<Vec<_>>()
}

fn denoise_frame_window(
frame_window: FrameQuadWindow,
fn denoise_frame_slice(
frame_window: &FrameSlice,
ims_converter: &timsrust::Scan2ImConverter,
mz_converter: &timsrust::Tof2MzConverter,
dia_frame_info: &DIAFrameInfo,
Expand Down Expand Up @@ -203,8 +203,8 @@ fn denoise_frame_window(

DenseFrameWindow {
frame: denoised_frame,
ims_start: denseframe_window.ims_start,
ims_end: denseframe_window.ims_end,
ims_min: denseframe_window.ims_min,
ims_max: denseframe_window.ims_max,
mz_start: denseframe_window.mz_start,
mz_end: denseframe_window.mz_end,
group_id: denseframe_window.group_id,
Expand Down Expand Up @@ -306,16 +306,16 @@ impl<'a> Denoiser<'a, Frame, Vec<DenseFrameWindow>, Converters, Option<usize>>
{
info!("Denoising {} frames", elems.len());

let frame_window_slices = self.dia_frame_info.split_frame_windows(elems);
let frame_window_slices = self.dia_frame_info.split_frame_windows(&elems);
let mut out = Vec::with_capacity(frame_window_slices.len());
for sv in frame_window_slices {
let progbar = indicatif::ProgressBar::new(sv.len() as u64);
let denoised_elements: Vec<DenseFrameWindow> = sv
.into_par_iter()
.progress_with(progbar)
.map(|x| {
denoise_frame_window(
x,
denoise_frame_slice(
&x,
&self.ims_converter,
&self.mz_converter,
&self.dia_frame_info,
Expand Down
34 changes: 14 additions & 20 deletions src/aggregation/tracing.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
use crate::aggregation::dbscan::dbscan_generic;
use crate::aggregation::aggregators::ClusterAggregator;
use crate::aggregation::chromatograms::{
BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS,
};
use crate::aggregation::dbscan::dbscan_generic;
use crate::ms::frames::DenseFrameWindow;
use crate::space::space_generics::NDBoundary;
use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike};
use crate::utils;
use crate::utils::RollingSDCalculator;
use crate::space::space_generics::NDBoundary;
use crate::aggregation::chromatograms::{BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS};

use core::panic;
use log::{debug, error, info, warn};
use rayon::iter::IntoParallelIterator;
use rayon::prelude::*;
use serde::ser::{SerializeStruct, Serializer};
use serde::{Deserialize, Serialize};
use serde::ser::{Serializer, SerializeStruct};
use core::panic;
use std::error::Error;
use std::io::Write;
use std::path::Path;
Expand Down Expand Up @@ -93,10 +95,12 @@ impl Serialize for BaseTrace {
state.serialize_field("chromatogram", &format!("{:?}", chromatogram))?;
state.end()
}

}

pub fn write_trace_csv(traces: &Vec<BaseTrace>, path: impl AsRef<Path>) -> Result<(), Box<dyn Error>> {
pub fn write_trace_csv(
traces: &Vec<BaseTrace>,
path: impl AsRef<Path>,
) -> Result<(), Box<dyn Error>> {
let mut wtr = csv::Writer::from_path(path).unwrap();
for trace in traces {
wtr.serialize(trace)?;
Expand Down Expand Up @@ -272,7 +276,6 @@ pub fn combine_traces(
out
}


#[derive(Debug, Clone)]
struct TraceAggregator {
mz: RollingSDCalculator<f64, u64>,
Expand Down Expand Up @@ -313,7 +316,8 @@ impl ClusterAggregator<TimeTimsPeak, BaseTrace> for TraceAggregator {

// The chromatogram is an array centered on the retention time
let num_rt_points = self.btree_chromatogram.btree.len();
let chromatogram: ChromatogramArray<f32, NUM_LOCAL_CHROMATOGRAM_BINS> = self.btree_chromatogram.as_chromatogram_array(Some(rt));
let chromatogram: ChromatogramArray<f32, NUM_LOCAL_CHROMATOGRAM_BINS> =
self.btree_chromatogram.as_chromatogram_array(Some(rt));

// let apex = chromatogram.chromatogram.iter().enumerate().max_by_key(|x| (x.1 * 100.) as i32).unwrap().0;
// let apex_offset = (apex as f32 - (NUM_LOCAL_CHROMATOGRAM_BINS as f32 / 2.)) * self.btree_chromatogram.rt_binsize;
Expand Down Expand Up @@ -391,7 +395,6 @@ impl NDPointConverter<BaseTrace, 3> for BypassBaseTraceBackConverter {
}
}


fn _flatten_denseframe_vec(denseframe_windows: Vec<DenseFrameWindow>) -> Vec<TimeTimsPeak> {
denseframe_windows
.into_iter()
Expand All @@ -415,7 +418,6 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec<DenseFrameWindow>) -> Vec<Tim
// Needed to specify the generic in dbscan_generic
type FFTimeTimsPeak = fn(&TimeTimsPeak, &TimeTimsPeak) -> bool;


// TODO maybe this can be a builder-> executor pattern
fn _combine_single_window_traces(
prefiltered_peaks: Vec<TimeTimsPeak>,
Expand Down Expand Up @@ -603,10 +605,7 @@ impl NDPointConverter<BaseTrace, 3> for BaseTraceConverter {
fn convert_to_bounds_query<'a>(
&self,
point: &'a NDPoint<3>,
) -> (
NDBoundary<3>,
Option<&'a NDPoint<3>>,
) {
) -> (NDBoundary<3>, Option<&'a NDPoint<3>>) {
const NUM_DIMENTIONS: usize = 3;
// let range_center = (point.values[1] + point.values[2]) / 2.;
let mut starts = point.values;
Expand Down Expand Up @@ -647,9 +646,6 @@ impl NDPointConverter<PseudoSpectrum, 3> for PseudoScanBackConverter {
}
}




#[derive(Debug, Serialize, Deserialize, Clone, Copy)]
pub struct PseudoscanGenerationConfig {
pub rt_scaling: f32,
Expand Down Expand Up @@ -677,7 +673,6 @@ impl Default for PseudoscanGenerationConfig {
}
}


pub fn combine_pseudospectra(
traces: Vec<BaseTrace>,
config: PseudoscanGenerationConfig,
Expand All @@ -689,7 +684,6 @@ pub fn combine_pseudospectra(
rt_scaling: config.rt_scaling.into(),
ims_scaling: config.ims_scaling.into(),
quad_scaling: config.quad_scaling.into(),

// rt_start_end_ratio: 2.,
// peak_width_prior: 0.75,
};
Expand Down
Loading

0 comments on commit 31e29c1

Please sign in to comment.