Skip to content

Commit

Permalink
chore(dev): development updates
Browse files Browse the repository at this point in the history
  • Loading branch information
esteinig committed Jun 20, 2024
1 parent 388dc3a commit 3f26d12
Show file tree
Hide file tree
Showing 18 changed files with 681 additions and 817 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,19 @@ This is a preliminary public release of code for the viral enrichment branch of

> Michael A Moso*, George Taiaroa*, Eike Steinig*, Madiyar Zhanduisenov, Grace Butel-Simoes, Ivana Savic, Mona L Taouk, Socheata Chea, Jean Moselen, Jacinta O’Keefe, Jacqueline Prestedge, Georgina L Pollock, Mohammad Khan, Katherine Soloczynskyj, Janath Fernando, Genevieve E Martin, Leon Caly, Ian G Barr, Thomas Tran, Julian Druce, Chuan K Lim, Deborah A Williamson - **Non-SARS-CoV-2 respiratory viral detection and whole genome sequencing from COVID-19 rapid antigen test devices: a laboratory evaluation study** - Lancet Microbe (2024) -[10.1016/S2666-5247(23)00375-0](https://doi.org/10.1016/S2666-5247(23)00375-0)
## Pipeline Testing

```
# Check for errors during development - this will print the startup and completion
# messages to the console and exit the pipeline execution gracefully if not errors
# were found:
nextflow run cerebro/ -profile test_dev
# Check for input checking with minimal database configurations for quality control
# with the human reference database index and
nextflow run cerebro/ -profile db,db_ont,test_io
nextflow run cerebro/ -profile db,db_sr,test_io
```
2 changes: 1 addition & 1 deletion conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
- covtobed=1.3.5
- minimap2=2.24
- samtools
- kraken2=2.1.2
- kraken2=2.3
- bracken=2.9.0
- fastp=0.23.2
- nextflow=23.10.1
Expand Down
33 changes: 0 additions & 33 deletions lib/configs/validation.config

This file was deleted.

Empty file added lib/processes/kmcp.nf
Empty file.
20 changes: 10 additions & 10 deletions lib/processes/kraken2.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process Kraken2Uniq {

tag { id }
label "kraken2uniq"
label "kraken2"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.kraken2uniq.report", saveAs: { "kmer__kraken2uniq__${idx_name}" }
Expand All @@ -20,7 +20,7 @@ process Kraken2Uniq {
idx_name = kraken2_db.baseName

"""
kraken2 --db $kraken2_db --minimum-hit-groups $params.kraken2_minimum_hit_groups --report-minimizer-data --threads $task.cpus --output ${id}_${idx_name}.kraken2uniq --report ${id}_${idx_name}.kraken2uniq.report --paired $forward $reverse
kraken2 --db $kraken2_db --minimum-hit-groups $params.taxa.kmer.kraken2.minimum_hit_groups --report-minimizer-data --threads $task.cpus --output ${id}_${idx_name}.kraken2uniq --report ${id}_${idx_name}.kraken2uniq.report --paired $forward $reverse
cp ${id}_${idx_name}.kraken2uniq.report kmer__kraken2uniq__${idx_name}
"""

Expand All @@ -29,7 +29,7 @@ process Kraken2Uniq {
process Kraken2Bracken {

tag { id }
label "kraken2bracken"
label "kraken2"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.kraken2bracken.report", saveAs: { "kmer__kraken2bracken__${idx_name}" }
Expand All @@ -48,8 +48,8 @@ process Kraken2Bracken {
idx_name = kraken2_db.baseName

"""
kraken2 --db $kraken2_db --confidence $params.kraken2_confidence --threads $task.cpus --output ${id}_${idx_name}.kraken2 --report ${id}_${idx_name}.kraken2.report --paired $forward $reverse
bracken -d $kraken2_db -i ${id}_${idx_name}.kraken2.report -r $params.bracken_read_length -l $params.bracken_taxonomic_level -t $params.bracken_read_threshold -o {id}_${idx_name}.kraken2bracken.report -w sample.breport
kraken2 --db $kraken2_db --confidence $params.taxa.kmer.kraken2.confidence --threads $task.cpus --output ${id}_${idx_name}.kraken2 --report ${id}_${idx_name}.kraken2.report --paired $forward $reverse
bracken -d $kraken2_db -i ${id}_${idx_name}.kraken2.report -r $params.taxa.kmer.bracken.read_length -l $params.taxa.kmer.bracken.taxonomic_level -t $params.taxa.kmer.bracken.read_threshold -o {id}_${idx_name}.kraken2bracken.report -w sample.breport
cp ${id}_${idx_name}.kraken2bracken.report kmer__kraken2bracken__${idx_name}
"""

Expand All @@ -58,7 +58,7 @@ process Kraken2Bracken {
process Kraken2UniqOnt {

tag { id }
label "kraken2uniq"
label "kraken2"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.kraken2uniq.report", saveAs: { "kmer__kraken2uniq__${idx_name}" }
Expand All @@ -77,7 +77,7 @@ process Kraken2UniqOnt {
idx_name = kraken2_db.baseName

"""
kraken2 --db $kraken2_db --minimum-hit-groups $params.kraken2_minimum_hit_groups --report-minimizer-data --threads $task.cpus --output ${id}_${idx_name}.kraken2uniq --report ${id}_${idx_name}.kraken2uniq.report $reads
kraken2 --db $kraken2_db --minimum-hit-groups $params.taxa.kmer.kraken2.minimum_hit_groups --report-minimizer-data --threads $task.cpus --output ${id}_${idx_name}.kraken2uniq --report ${id}_${idx_name}.kraken2uniq.report $reads
cp ${id}_${idx_name}.kraken2uniq.report kmer__kraken2uniq__${idx_name}
"""

Expand All @@ -86,7 +86,7 @@ process Kraken2UniqOnt {
process Kraken2BrackenOnt {

tag { id }
label "kraken2bracken"
label "kraken2"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.kraken2bracken.report", saveAs: { "kmer__kraken2bracken__${idx_name}" }
Expand All @@ -105,8 +105,8 @@ process Kraken2BrackenOnt {
idx_name = kraken2_db.baseName

"""
kraken2 --db $kraken2_db --confidence $params.kraken2_confidence --threads $task.cpus --output ${id}_${idx_name}.kraken2 --report ${id}_${idx_name}.kraken2.report $reads
bracken -d $kraken2_db -i ${id}_${idx_name}.kraken2.report -r $params.bracken_read_length -l $params.bracken_taxonomic_level -t $params.bracken_read_threshold -o {id}_${idx_name}.kraken2bracken.report -w sample.breport
kraken2 --db $kraken2_db --confidence $params.taxa.kmer.kraken2.confidence --threads $task.cpus --output ${id}_${idx_name}.kraken2 --report ${id}_${idx_name}.kraken2.report $reads
bracken -d $kraken2_db -i ${id}_${idx_name}.kraken2.report -r $params.taxa.kmer.bracken.read_length -l $params.taxa.kmer.bracken.taxonomic_level -t $params.taxa.kmer.bracken.read_threshold -o {id}_${idx_name}.kraken2bracken.report -w sample.breport
cp ${id}_${idx_name}.kraken2bracken.report kmer__kraken2bracken__${idx_name}
"""

Expand Down
73 changes: 73 additions & 0 deletions lib/processes/metabuli.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
process Metabuli {

tag { id }
label "metabuli"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.metabuli.report", saveAs: { "kmer__metabuli__${idx_name}" }
publishDir "$params.outdir/workflow/$params.subdir", mode: "symlink", pattern: "${id}_${idx_name}.metabuli.tsv"
publishDir "$params.outdir/workflow/$params.subdir", mode: "copy", pattern: "${id}_${idx_name}.metabuli.report"

input:
tuple val(id), path(forward), path(reverse)
each path(metabuli_db)

output:
tuple(val(id), path("kmer__metabuli__${idx_name}"), emit: results)
tuple path("${id}_${idx_name}.metabuli.report"), path("${id}_${idx_name}.metabuli")

script:

mem = task.memory.split()[0]
idx_name = metabuli_db.baseName

min_score = params.taxa.metabuli.precision.enabled && params.taxa.metabuli.precision.min_score ? "--min-score $params.taxa.metabuli.precision.min_score" : ""
min_sp_score = params.taxa.metabuli.precision.enabled && params.taxa.metabuli.precision.min_sp_score ? "--min-sp-score $params.taxa.metabuli.precision.min_sp_score" : ""

"""
metabuli classify $min_score $min_sp_score --max-ram $mem --threads $task.cpus $forward $reverse $metabuli_db classified/ $id
cp classified/${id}_classifications.tsv ${id}_${idx_name}.metabuli.tsv
rm classified/${id}_classifications.tsv
cp classified/${id}_report.tsv ${id}_${idx_name}.metabuli.report
"""

}

process MetabuliOnt {

tag { id }
label "metabuli"
tag { "$id : $idx_name" }

publishDir "$params.outdir/results/$id", mode: "copy", pattern: "${id}_${idx_name}.metabuli.report", saveAs: { "kmer__metabuli__${idx_name}" }
publishDir "$params.outdir/workflow/$params.subdir", mode: "symlink", pattern: "${id}_${idx_name}.metabuli.tsv"
publishDir "$params.outdir/workflow/$params.subdir", mode: "copy", pattern: "${id}_${idx_name}.metabuli.report"

input:
tuple val(id), path(reads)
each path(metabuli_db)

output:
tuple(val(id), path("kmer__metabuli__${idx_name}"), emit: results)
tuple path("${id}_${idx_name}.metabuli.report"), path("${id}_${idx_name}.metabuli")

script:

mem = task.memory.split()[0]
idx_name = metabuli_db.baseName

min_score = params.taxa.metabuli.precision.enabled && params.taxa.metabuli.precision.min_score ? "--min-score $params.taxa.metabuli.precision.min_score" : ""
min_sp_score = params.taxa.metabuli.precision.enabled && params.taxa.metabuli.precision.min_sp_score ? "--min-sp-score $params.taxa.metabuli.precision.min_sp_score" : ""

"""
metabuli classify $min_score $min_sp_score --max-ram $mem --threads $task.cpus $reads $metabuli_db classified/ $id
cp classified/${id}_classifications.tsv ${id}_${idx_name}.metabuli.tsv
rm classified/${id}_classifications.tsv
cp classified/${id}_report.tsv ${id}_${idx_name}.metabuli.report
"""

}
62 changes: 53 additions & 9 deletions lib/processes/vircov.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,25 @@ process VircovReferenceSelection {

segment_field = params.vircov_group_select_segment_field ? "--segment-field '$params.vircov_group_select_segment_field'" : ""
segment_field_nan = params.vircov_group_select_segment_field_nan ? "--segment-field-nan '$params.vircov_group_select_segment_field_nan'" : ""
exclude = blacklist ? "--exclude $params.virus_blacklist" : ""
exclude = blacklist ? "--exclude $blacklist" : ""

"""
vircov coverage --alignment $alignment --fasta $alignment_fasta --min-len $params.vircov_scan_min_len --min-cov $params.vircov_scan_min_cov --min-mapq $params.vircov_scan_min_mapq --reads $params.vircov_scan_reads --coverage $params.vircov_scan_coverage --regions $params.vircov_scan_regions --regions-coverage $params.vircov_scan_regions_coverage --group-by "$params.vircov_group_by" --group-sep "$params.vircov_group_sep" --group-select-by "$params.vircov_group_select_by" --group-select-split references --group-select-order --group-select-data "${id}_scan_${db_name}.grouped.tsv" $segment_field $segment_field_nan $exclude > ${id}_scan_${db_name}.select.tsv
vircov coverage --alignment $alignment --fasta $alignment_fasta \
--min-len $params.taxa.alignment.scan.min_len \
--min-cov $params.taxa.alignment.scan.min_cov \
--min-mapq $params.taxa.alignment.scan.min_mapq \
--reads $params.taxa.alignment.scan.min_reads \
--coverage $params.taxa.alignment.scan.min_coverage \
--regions $params.taxa.alignment.scan.min_regions \
--regions-coverage $params.taxa.alignment.scan.min_mapq \
--group-by "$params.taxa.alignment.scan.selection.group_by" \
--group-sep "$params.taxa.alignment.scan.selection.group_sep" \
--group-select-by "$params.taxa.alignment.scan.selection.select_by" \
--group-select-split references \
--group-select-order \
--group-select-data "${id}_scan_${db_name}.grouped.tsv" \
$segment_field $segment_field_nan $exclude > ${id}_scan_${db_name}.select.tsv
cp ${id}_scan_${db_name}.select.tsv align__vircov__${db_name}__scan
"""

Expand Down Expand Up @@ -53,18 +68,32 @@ process VircovReferenceSelectionOnt {

script:

segment_field = params.vircov_group_select_segment_field ? "--segment-field '$params.vircov_group_select_segment_field'" : ""
segment_field_nan = params.vircov_group_select_segment_field_nan ? "--segment-field-nan '$params.vircov_group_select_segment_field_nan'" : ""
segment_field = params.vircov_group_select_segment_field ? "--segment-field '$params.taxa.alignment.scan.selection.segment_field'" : ""
segment_field_nan = params.vircov_group_select_segment_field_nan ? "--segment-field-nan '$params.taxa.alignment.scan.selection.segment_field_nan'" : ""
exclude = blacklist ? "--exclude $params.virus_blacklist" : ""

"""
vircov coverage --alignment $alignment --fasta $alignment_fasta --min-len $params.vircov_scan_min_len --min-cov $params.vircov_scan_min_cov --min-mapq $params.vircov_scan_min_mapq --reads $params.vircov_scan_reads --coverage $params.vircov_scan_coverage --regions $params.vircov_scan_regions --regions-coverage $params.vircov_scan_regions_coverage --group-by "$params.vircov_group_by" --group-sep "$params.vircov_group_sep" --group-select-by "$params.vircov_group_select_by" --group-select-split references --group-select-order --group-select-data "${id}_scan_${db_name}.grouped.tsv" $segment_field $segment_field_nan $exclude > ${id}_scan_${db_name}.select.tsv
vircov coverage --alignment $alignment --fasta $alignment_fasta \
--min-len $params.taxa.alignment.scan.min_len \
--min-cov $params.taxa.alignment.scan.min_cov \
--min-mapq $params.taxa.alignment.scan.min_mapq \
--reads $params.taxa.alignment.scan.min_reads \
--coverage $params.taxa.alignment.scan.min_coverage \
--regions $params.taxa.alignment.scan.min_regions \
--regions-coverage $params.taxa.alignment.scan.min_mapq \
--group-by "$params.taxa.alignment.scan.selection.group_by" \
--group-sep "$params.taxa.alignment.scan.selection.group_sep" \
--group-select-by "$params.taxa.alignment.scan.selection.select_by" \
--group-select-split references \
--group-select-order \
--group-select-data "${id}_scan_${db_name}.grouped.tsv" \
$segment_field $segment_field_nan $exclude > ${id}_scan_${db_name}.select.tsv
cp ${id}_scan_${db_name}.select.tsv align__vircov__${db_name}__scan
"""

}


process VircovRealign {

tag { "$id : $idx_name : $db_name" }
Expand All @@ -85,12 +114,19 @@ process VircovRealign {
script:

"""
vircov coverage --alignment $alignment --fasta $fasta --min-len $params.vircov_remap_min_len --min-cov $params.vircov_remap_min_cov --min-mapq $params.vircov_remap_min_mapq --reads $params.vircov_remap_reads --coverage $params.vircov_remap_coverage --regions $params.vircov_remap_regions --regions-coverage $params.vircov_remap_regions_coverage --read-ids ${id}_${idx_name}.txt -v > ${id}_${idx_name}.tsv
vircov coverage --alignment $alignment --fasta $fasta \
--min-len $params.taxa.alignment.remap.min_len \
--min-cov $params.taxa.alignment.remap.min_cov \
--min-mapq $params.taxa.alignment.remap.min_mapq \
--reads $params.vircov_remap_reads \
--coverage $params.taxa.alignment.remap.min_coverage\
--regions $params.taxa.alignment.remap.min_regions \
--regions-coverage $params.taxa.alignment.remap.min_regions_coverage \
--read-ids ${id}_${idx_name}.txt -v > ${id}_${idx_name}.tsv
"""

}


process VircovRealignOnt {

tag { "$id : $idx_name : $db_name" }
Expand All @@ -111,7 +147,15 @@ process VircovRealignOnt {
script:

"""
vircov coverage --alignment $alignment --fasta $fasta --min-len $params.vircov_remap_min_len --min-cov $params.vircov_remap_min_cov --min-mapq $params.vircov_remap_min_mapq --reads $params.vircov_remap_reads --coverage $params.vircov_remap_coverage --regions $params.vircov_remap_regions --regions-coverage $params.vircov_remap_regions_coverage --read-ids ${id}_${idx_name}.txt -v > ${id}_${idx_name}.tsv
vircov coverage --alignment $alignment --fasta $fasta \
--min-len $params.taxa.alignment.remap.min_len \
--min-cov $params.taxa.alignment.remap.min_cov \
--min-mapq $params.taxa.alignment.remap.min_mapq \
--reads $params.vircov_remap_reads \
--coverage $params.taxa.alignment.remap.min_coverage\
--regions $params.taxa.alignment.remap.min_regions \
--regions-coverage $params.taxa.alignment.remap.min_regions_coverage \
--read-ids ${id}_${idx_name}.txt -v > ${id}_${idx_name}.tsv
"""

}
Expand Down
Loading

0 comments on commit 3f26d12

Please sign in to comment.