Skip to content

Commit

Permalink
chore(dev): refactor taxon evidence; add panviral production draft
Browse files Browse the repository at this point in the history
  • Loading branch information
esteinig committed Dec 10, 2024
1 parent 51032f0 commit 0e318a8
Show file tree
Hide file tree
Showing 24 changed files with 476 additions and 457 deletions.
83 changes: 41 additions & 42 deletions app/src/lib/utils/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,7 @@ export type TaxonLevel = {
}

export type TaxonEvidence = {
alignment: VircovRecord[],
records: PathogenDetectionRecord[]
}

Expand Down Expand Up @@ -1151,52 +1152,49 @@ export type Kraken2UniqRecord = {
rpm: number
}


export type VircovRecord = {
id: string,
db: string,
tool: string,
taxid: string,
reference: string,
regions: number,
reads: number,
alignments: number,
bases: number,
length: number,
coverage: number,
description: string,
tags: string,
rpm: number
export enum Aligner {
Bowtie2 = "bowtie2",
Minimap2 = "minimap2",
Strobealign = "strobealign"
}

export type VircovScanRemapRecord = {
id: string,
db: string,
tool: string,
reference: string,
reference_length: number,
scan_regions: number,
scan_reads: number,
scan_alignments: number,
scan_bases_covered: number,
scan_coverage: number,
remap_regions: number | null,
remap_reads: number | null,
remap_alignments: number | null,
remap_bases_covered: number | null,
remap_coverage: number | null,
remap_mean_depth: number | null,
consensus_length: number | null,
consensus_missing: number | null,
consensus_completeness: number | null,
taxid: string | null,
name: string | null,
segment: string | null,
reference_description: string,
scan_rpm: number,
remap_rpm: number,

export enum ConsensusAssembler {
Ivar = "Ivar"
}

export type VircovRecord = {
id: string | null;
index: string | null;
aligner: Aligner | null;
assembler: ConsensusAssembler | null;
bin: string | null;
name: string | null;
segment: string | null;
taxid: string | null;
reference: string;
reference_length: number;
scan_regions: number;
scan_reads: number;
scan_alignments: number;
scan_bases_covered: number;
scan_coverage: number;
remap_regions: number | null;
remap_reads: number | null;
remap_alignments: number | null;
remap_bases_covered: number | null;
remap_coverage: number | null;
remap_depth: number | null;
remap_depth_coverage: number | null;
consensus_length: number | null;
consensus_missing: number | null;
consensus_completeness: number | null;
consensus_alignments_mapq: number | null;
consensus_coverage_mapq: number | null;
reference_description: string;
};


export type BlastLcaRecord = {
id: string,
db: string,
Expand Down Expand Up @@ -1941,6 +1939,7 @@ export enum FileTag {
POS = "POS",
NEG = "NEG",
NTC = "NTC",
TMP = "TMP",
ENV = "ENV",
HOST = "HOST"
}
Expand Down
8 changes: 6 additions & 2 deletions cerebro/lib/model/src/api/cerebro/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,14 @@ impl SampleConfig {
/// Create a minimal sample configuration with defaults
pub fn with_default(id: &str) -> Result<Self, ModelError> {

let (id, tags) = get_sample_regex_matches(id)?;
let (mut sample_id, tags) = get_sample_regex_matches(id)?;

if sample_id.is_empty() {
sample_id = id.to_string()
}

Ok(Self {
id: id.to_string(),
id: sample_id,
tags,
..SampleConfig::default()
})
Expand Down
49 changes: 47 additions & 2 deletions cerebro/stack/client/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use cerebro_client::error::HttpClientError;
use cerebro_model::api::towers::schema::RegisterTowerSchema;
use cerebro_model::api::stage::schema::RegisterStagedSampleSchema;
use cerebro_model::api::watchers::schema::RegisterWatcherSchema;
use cerebro_pipe::modules::panviral::Panviral;
use cerebro_pipe::modules::pathogen::PathogenDetection;
use cerebro_pipe::modules::quality::QualityControl;
use clap::Parser;
Expand Down Expand Up @@ -48,13 +49,13 @@ fn main() -> anyhow::Result<()> {
client.ping_status()?
},
// Process and upload sample model to database
Commands::UploadSample( args ) => {
Commands::UploadPathogen( args ) => {

let quality = QualityControl::from_json(&args.quality)?;
let pathogen = PathogenDetection::from_json(&args.pathogen)?;

if quality.id != pathogen.id {
return Err(HttpClientError::IdentifiersNotMatched(quality.id, pathogen.id).into())
return Err(HttpClientError::PathogenIdentifiersNotMatched(quality.id, pathogen.id).into())
}

let cerebro = Cerebro::from(
Expand Down Expand Up @@ -89,6 +90,50 @@ fn main() -> anyhow::Result<()> {
)?;
}

},
// Process and upload sample model to database
Commands::UploadPanviral( args ) => {

let quality = QualityControl::from_json(&args.quality)?;
let panviral = Panviral::from_json(&args.panviral)?;

if quality.id != panviral.id {
return Err(HttpClientError::PanviralIdentifiersNotMatched(quality.id, panviral.id).into())
}

let cerebro = Cerebro::from(
&quality.id,
&quality,
&panviral.get_taxa(
&args.taxonomy,
args.strict
)?,
args.sample_sheet.clone(),
args.pipeline_config.clone(),
args.run_id.clone()
)?;

if let Some(model_dir) = &args.model_dir {

if !model_dir.exists() || !model_dir.is_dir() {
create_dir_all(&model_dir)?
}

let output_file = model_dir.join(format!("{}.json", cerebro.name));
log::info!("Writing model to file: {}", output_file.display());
cerebro.write_json(&output_file)?;
}

log::info!("{:?}", cerebro);

if !args.no_upload {
client.upload_models(
&Vec::from([cerebro]),
&args.team_name,
&args.project_name,
args.db_name.as_ref()
)?;
}

},
Commands::UploadModel( args ) => {
Expand Down
4 changes: 3 additions & 1 deletion cerebro/stack/client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,7 @@ pub enum HttpClientError {
RequireProjectNotConfigured,

#[error("failed to match quality control data identifier ({0}) with pathogen detection data identifier ({0})")]
IdentifiersNotMatched(String, String)
PathogenIdentifiersNotMatched(String, String),
#[error("failed to match quality control data identifier ({0}) with panviral data identifier ({0})")]
PanviralIdentifiersNotMatched(String, String)
}
48 changes: 46 additions & 2 deletions cerebro/stack/client/src/terminal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ pub enum Commands {
/// Ping the server as unauthenticated user
PingStatus(StatusArgs),
/// Process and upload pipeline outputs to database
UploadSample(UploadSampleArgs),
UploadPathogen(UploadPathogenArgs),
/// Process and upload pipeline outputs to database
UploadPanviral(UploadPanviralArgs),
/// Upload processed model to database
UploadModel(UploadModelArgs),
/// Summary of taxa evidence for requested models
Expand Down Expand Up @@ -125,7 +127,7 @@ pub struct LoginArgs {


#[derive(Debug, Args)]
pub struct UploadSampleArgs {
pub struct UploadPathogenArgs {
/// Processed pipeline quality control module (.json)
#[clap(long)]
pub quality: PathBuf,
Expand Down Expand Up @@ -164,6 +166,48 @@ pub struct UploadSampleArgs {
pub no_upload: bool,
}



#[derive(Debug, Args)]
pub struct UploadPanviralArgs {
/// Processed pipeline quality control module (.json)
#[clap(long)]
pub quality: PathBuf,
/// Processed pipeline panviral detection module (.json)
#[clap(long)]
pub panviral: PathBuf,
/// Taxonomy directory containing 'nodes.dmp' and 'names.dmp'
#[clap(long)]
pub taxonomy: PathBuf,
/// Raise error if taxid was not found in taxonomy
#[clap(long)]
pub strict: bool,
/// Run identifier if sample sheet is not provided
#[clap(long)]
pub run_id: Option<String>,
/// Pipeline sample sheet (.csv)
#[clap(long)]
pub sample_sheet: Option<PathBuf>,
/// Pipeline configuration (.json)
#[clap(long)]
pub pipeline_config: Option<PathBuf>,
/// Team name for model upload
#[clap(long, short = 't')]
pub team_name: String,
/// Project name for model upload
#[clap(long, short = 'p')]
pub project_name: String,
/// Database name for model upload, otherwise team default database
#[clap(long, short = 'd')]
pub db_name: Option<String>,
/// Output database model as file (.json)
#[clap(long, short = 'o')]
pub model_dir: Option<PathBuf>,
/// Do not upload, use to write the model to --model-dir only
#[clap(long)]
pub no_upload: bool,
}

#[derive(Debug, Args)]
pub struct UploadModelArgs {
/// Processed database models(.json)
Expand Down
11 changes: 10 additions & 1 deletion cerebro/stack/pipe/src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use cerebro_pipe::{modules::{pathogen::{write_pathogen_table, PathogenDetection, PathogenDetectionFilter}, quality::{write_quality_table, QualityControl}}, nextflow::{panviral::PanviralOutput, pathogen::PathogenOutput, quality::{QualityControlFiles, QualityControlOutput}}, terminal::{App, Commands, ProcessCommands, TableCommands, ToolsCommands}, tools::{scan::ScanReads, sheet::SampleSheet, umi::Umi}, utils::init_logger};
use cerebro_pipe::{modules::{panviral::Panviral, pathogen::{write_pathogen_table, PathogenDetection, PathogenDetectionFilter}, quality::{write_quality_table, QualityControl}}, nextflow::{panviral::PanviralOutput, pathogen::PathogenOutput, quality::{QualityControlFiles, QualityControlOutput}}, terminal::{App, Commands, ProcessCommands, TableCommands, ToolsCommands}, tools::{scan::ScanReads, sheet::SampleSheet, umi::Umi}, utils::init_logger};
use clap::Parser;

fn main() -> anyhow::Result<()> {
Expand All @@ -18,10 +18,19 @@ fn main() -> anyhow::Result<()> {
&args.input, args.id.clone(), args.background
)?;
let quality_control = QualityControl::from_panviral(&output);

let panviral = Panviral::from_panviral(
&output,
&quality_control,
args.paired_end
)?;

if let Some(path) = &args.qc {
quality_control.to_json(path)?;
}
if let Some(path) = &args.panviral {
panviral.to_json(path)?;
}

}

Expand Down
6 changes: 5 additions & 1 deletion cerebro/stack/pipe/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ pub enum WorkflowError {
/// Indicates a failure to build a glob from multiple patterns for file matching
#[error("failed to build glob walker for file parsing")]
GlobWalkBuilder,
/// Indicates a failure to parse a traxid from an alignment output sequence description
/// Indicates a failure to parse a taxid from an alignment output sequence description
#[error("failed to parse the taxid field from a Vircov record header (DB: {0})")]
VircovTaxidFieldMissing(String),
/// Indicates failure with JSON serialization
Expand Down Expand Up @@ -324,6 +324,10 @@ pub enum WorkflowError {
#[error("failed to recover tax info from Sylph output lineage: {0}")]
SylphTaxInfoRecoveryFailure(String),

/// Represents a failure to obtain the taxid for an alignment record
/// most likely due to not having a taxid field annotation in Vircov
#[error("taxid annotation missing for alignment record")]
PanviralTaxidAnnotationMissing,
}

#[derive(Error, Debug)]
Expand Down
3 changes: 3 additions & 0 deletions cerebro/stack/pipe/src/modules/assembly.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub struct AssemblyRecord {

}
4 changes: 3 additions & 1 deletion cerebro/stack/pipe/src/modules/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
pub mod quality;
pub mod pathogen;
pub mod pathogen;
pub mod panviral;
pub mod assembly;
Loading

0 comments on commit 0e318a8

Please sign in to comment.