From f76dd1980309e32cb91427d719151f8cf7e812d1 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sat, 7 Oct 2023 21:42:42 +0000 Subject: [PATCH 01/10] incorporate feedback --- CHANGELOG.md | 2 +- README.md | 48 ++++++++------------------- conf/base.config | 2 +- conf/test_full.config | 3 -- modules/local/pyopenms_idfilter.nf | 2 +- modules/local/tdf2mzml.nf | 1 - nextflow.config | 2 +- subworkflows/local/process_feature.nf | 2 -- subworkflows/local/quant.nf | 16 ++++----- workflows/mhcquant.nf | 2 +- 10 files changed, 27 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37bcfb91..3232c035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/04 +## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/09 ### `Added` diff --git a/README.md b/README.md index db37dbc0..fb9bea3d 100644 --- a/README.md +++ b/README.md @@ -32,52 +32,31 @@ On release, automated continuous integration tests run the pipeline on a full-si > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. - +Each row represents a mass spectrometry run in one of the formats: raw, mzML, d Now, you can run the pipeline using: - - ```bash -nextflow run nf-core/mhcquant \ - -profile \ - --input samplesheet.csv \ - --outdir +nextflow run nf-core/mhcquant -profile \ + --input 'samples.tsv' \ + --fasta 'SWISSPROT_2020.fasta' \ + --outdir ./results ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!NOTE] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mhcquant/usage) and the [parameter documentation](https://nf-co.re/mhcquant/parameters). -## Pipeline output - -======= - -```bash -nextflow run nf-core/mhcquant -profile test, \ - --input 'samples.tsv' \ - --fasta 'SWISSPROT_2020.fasta' \ - --outdir ./results -``` - ## Pipeline summary ### Default Steps @@ -136,6 +115,9 @@ Additional functionality contained by the pipeline currently includes: - Predict psm results using mhcflurry to shrink search space (`mhcflurry`) - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`) +> [!WARNING] +> The HLA prediction feature is outdated and will be reworked in the following releases + #### Prediction of HLA class 1 peptides - Predict peptides (`mhcnuggets`, `mhcflurry`, `fred2`) @@ -149,8 +131,6 @@ Additional functionality contained by the pipeline currently includes: ## Documentation -> > > > > > > dev - To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/mhcquant/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/mhcquant/output). diff --git a/conf/base.config b/conf/base.config index 389f06d6..ae01d45b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -57,7 +57,7 @@ process { cache = false } withName:TDF2MZML { - cpus = { check_max( 10 * task.attempt, 'cpus' ) } + cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 10.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } diff --git a/conf/test_full.config b/conf/test_full.config index b5793cee..47945e74 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,10 +17,7 @@ params { // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' - allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv' - predict_class_1 = true - predict_class_2 = true use_deeplc = true use_ms2pip = true ms2pip_model_name = 'CID' diff --git a/modules/local/pyopenms_idfilter.nf b/modules/local/pyopenms_idfilter.nf index cabb8e7b..c4fb4698 100644 --- a/modules/local/pyopenms_idfilter.nf +++ b/modules/local/pyopenms_idfilter.nf @@ -1,6 +1,6 @@ process PYOPENMS_IDFILTER { tag "$meta.id" - label 'process_low' + label 'process_single' conda "bioconda::pyopenms=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/tdf2mzml.nf b/modules/local/tdf2mzml.nf index 0f6f59c3..13ea49fc 100644 --- a/modules/local/tdf2mzml.nf +++ b/modules/local/tdf2mzml.nf @@ -1,6 +1,5 @@ process TDF2MZML { tag "$meta.id" - //label 'process_medium' container "docker.io/mfreitas/tdf2mzml" diff --git a/nextflow.config b/nextflow.config index 2992acaf..a60e8ea3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,7 +71,7 @@ params { // DeepLC settings use_deeplc = false - deeplc_calibration_mode = 'rt_bin' + deeplc_calibration_mode = 'idx_bin' deeplc_calibration_bins = 20 deeplc_add_abs_rt_error = false deeplc_add_sqr_rt_error = false diff --git a/subworkflows/local/process_feature.nf b/subworkflows/local/process_feature.nf index a254285b..176861c7 100644 --- a/subworkflows/local/process_feature.nf +++ b/subworkflows/local/process_feature.nf @@ -6,8 +6,6 @@ include { OPENMS_IDMERGER } from include { OPENMS_FEATUREFINDERIDENTIFICATION } from '../../modules/local/openms_featurefinderidentification' include { OPENMS_FEATURELINKERUNLABELEDKD } from '../../modules/local/openms_featurelinkerunlabeledkd' include { OPENMS_IDCONFLICTRESOLVER } from '../../modules/local/openms_idconflictresolver' -include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANT } from '../../modules/local/openms_textexporter' -include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT } from '../../modules/local/openms_mztabexporter' workflow PROCESS_FEATURE { take: diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf index baea53e8..64c3d41d 100644 --- a/subworkflows/local/quant.nf +++ b/subworkflows/local/quant.nf @@ -45,7 +45,7 @@ workflow QUANT { // NOTE: This is an alternative filtering method that will be replaced by IDFilter with new release of OpenMS PYOPENMS_IDFILTER( ch_runs_to_be_filtered ).filtered .map { meta, idxml -> [[id:meta.sample + '_' + meta.condition], [id:meta.id, file:idxml]] } - .groupTuple(sort: sortById) + .groupTuple( sort: sortById ) .map { meta, idxml -> [meta, idxml.file] } .set { ch_runs_to_be_aligned } ch_versions = ch_versions.mix(PYOPENMS_IDFILTER.out.versions.ifEmpty(null)) @@ -56,21 +56,21 @@ workflow QUANT { mzml, merge_meta_map ) - ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix( MAP_ALIGNMENT.out.versions.ifEmpty(null) ) // We need to merge groupwise the aligned idxml files together to use them as id_ext in featurefinder - OPENMS_IDMERGER_QUANT(MAP_ALIGNMENT.out.aligned_idxml + OPENMS_IDMERGER_QUANT( MAP_ALIGNMENT.out.aligned_idxml .map { meta, aligned_idxml -> [[id: meta.sample + '_' + meta.condition], aligned_idxml] } .groupTuple()) ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions.ifEmpty(null)) // Manipulate channels such that we end up with : [meta, mzml, run_idxml, merged_runs_idxml] MAP_ALIGNMENT.out.aligned_mzml - .join(MAP_ALIGNMENT.out.aligned_idxml) - .map {meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] } - .groupTuple(sort: sortById) - .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file]} - .join(OPENMS_IDMERGER_QUANT.out.idxml) + .join( MAP_ALIGNMENT.out.aligned_idxml ) + .map { meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] } + .groupTuple( sort: sortById ) + .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file] } + .join( OPENMS_IDMERGER_QUANT.out.idxml ) .map { group_meta, meta, mzml, idxml, merged_idxml -> [meta, mzml, idxml, merged_idxml] } .transpose() .set { ch_runs_to_be_quantified } diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index c917941a..787caa60 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -336,7 +336,7 @@ workflow MHCQUANT { if (params.annotate_ions) { // Join the ch_filtered_idxml and the ch_mzml_file - ch_clean_mzml_file.map {meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } + ch_clean_mzml_file.map { meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } .groupTuple() .join(filter_q_value) .set{ ch_ion_annotator_input } From 6877012c74b960a44f2aad58ad42c2672bdb1c7a Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 07:49:21 +0000 Subject: [PATCH 02/10] Movve quant test to test full bc of github limitations --- .github/workflows/ci.yml | 2 +- conf/test_quant.config | 22 ---------------------- nextflow.config | 1 - 3 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 conf/test_quant.config diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5527a539..f8ad5eb4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" - tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_quant", "test_full"] + tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_full"] steps: - name: Check out pipeline code uses: actions/checkout@v2 diff --git a/conf/test_quant.config b/conf/test_quant.config deleted file mode 100644 index 105d4396..00000000 --- a/conf/test_quant.config +++ /dev/null @@ -1,22 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running quantification tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. - - Use as follows: - nextflow run nf-core/mhcquant -profile test_quant, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Quantification test profile' - config_profile_description = 'Quantification test dataset to check pipeline function' - - // Input data - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' - - spectrum_batch_size = 2000 -} diff --git a/nextflow.config b/nextflow.config index a60e8ea3..7bcca384 100644 --- a/nextflow.config +++ b/nextflow.config @@ -234,7 +234,6 @@ profiles { test_deeplc { includeConfig 'conf/test_deeplc.config' } test_ms2pip { includeConfig 'conf/test_ms2pip.config' } test_ionannotator { includeConfig 'conf/test_ionannotator.config' } - test_quant { includeConfig 'conf/test_quant.config' } test_full { includeConfig 'conf/test_full.config' } } From f76fb7f7d0fa34fbca0bda341f2d1e2dd26505ab Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 08:09:49 +0000 Subject: [PATCH 03/10] move quant test solely to test_full --- conf/test_full.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/test_full.config b/conf/test_full.config index 47945e74..b5793cee 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,7 +17,10 @@ params { // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' + allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv' + predict_class_1 = true + predict_class_2 = true use_deeplc = true use_ms2pip = true ms2pip_model_name = 'CID' From 2bf573d14f577fef92f54e3c26178cdddd1efa05 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 08:12:12 +0000 Subject: [PATCH 04/10] update warning for refine fdr --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index fb9bea3d..701d2c20 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,9 @@ Additional functionality contained by the pipeline currently includes: - Retention time prediction (`DeepLC`) - Peak intensity prediction (`MS2PIP`) +> [!WARNING] +> The refine fdr feature will be evaluated on a large benchmark dataset in the following releases. +> Consider it as an experimental feature. #### Refine FDR - This application converts several OpenMS XML formats to mzTab. (`MzTabExporter`) From 090aaa3e4c31aa140ea4e2527fb0029a6f1c4082 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 08:20:56 +0000 Subject: [PATCH 05/10] prettier --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 701d2c20..a796059b 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ Additional functionality contained by the pipeline currently includes: > [!WARNING] > The refine fdr feature will be evaluated on a large benchmark dataset in the following releases. > Consider it as an experimental feature. + #### Refine FDR - This application converts several OpenMS XML formats to mzTab. (`MzTabExporter`) From 0d0f46bd7d59994400dc951c1cedba53a3982ade Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 11:52:18 +0000 Subject: [PATCH 06/10] remove deeplc and ms2pip from test_full since test stops bc of runtime --- conf/test_full.config | 3 --- 1 file changed, 3 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index b5793cee..2aaa6e84 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,8 +21,5 @@ params { predict_class_1 = true predict_class_2 = true - use_deeplc = true - use_ms2pip = true - ms2pip_model_name = 'CID' annotate_ions = true } From 6d11bcc10ed0aa31c412899392385c8985511de1 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 15:00:24 +0000 Subject: [PATCH 07/10] reduce test_full bc no more storage of device (github) --- conf/test_full.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/test_full.config b/conf/test_full.config index 2aaa6e84..657a7514 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,5 +21,4 @@ params { predict_class_1 = true predict_class_2 = true - annotate_ions = true } From db8ad1d0348afdc45ebc18b3256cae427bc6fa8c Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 16:57:44 +0000 Subject: [PATCH 08/10] remove class1 and class2 from test_full --- conf/test.config | 1 - conf/test_deeplc.config | 1 - conf/test_full.config | 6 +++--- conf/test_ionannotator.config | 1 - conf/test_ms2pip.config | 1 - 5 files changed, 3 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 44196d45..029cfbde 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,7 +22,6 @@ params { // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv' - allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_allele_sheet.tsv' // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) skip_quantification = true diff --git a/conf/test_deeplc.config b/conf/test_deeplc.config index 687b0135..dff885b5 100644 --- a/conf/test_deeplc.config +++ b/conf/test_deeplc.config @@ -25,7 +25,6 @@ params { // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) skip_quantification = true - spectrum_batch_size = 5000 use_deeplc = true deeplc_add_abs_rt_error = true deeplc_add_sqr_rt_error = true diff --git a/conf/test_full.config b/conf/test_full.config index 657a7514..67355094 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,8 +17,8 @@ params { // Input data fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' - allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv' - predict_class_1 = true - predict_class_2 = true + use_deeplc = true + use_ms2pip = true + annotate_ions = true } diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config index 2e52c0b5..0de44b10 100644 --- a/conf/test_ionannotator.config +++ b/conf/test_ionannotator.config @@ -25,6 +25,5 @@ params { // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) skip_quantification = true - spectrum_batch_size = 5000 annotate_ions = true } diff --git a/conf/test_ms2pip.config b/conf/test_ms2pip.config index da3c23b5..6343b683 100644 --- a/conf/test_ms2pip.config +++ b/conf/test_ms2pip.config @@ -25,7 +25,6 @@ params { // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full) skip_quantification = true - spectrum_batch_size = 5000 use_ms2pip = true ms2pip_model_name = 'Immuno-HCD' } From 9f108574ba6b821f3e71bd3312043c33c7b543e6 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Sun, 8 Oct 2023 20:33:06 +0000 Subject: [PATCH 09/10] only use quant in test_full due to space issue on gh --- conf/test_full.config | 3 --- 1 file changed, 3 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 67355094..17eef6c3 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,7 +18,4 @@ params { fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv' - use_deeplc = true - use_ms2pip = true - annotate_ions = true } From eb6d692f9e7311b8afe22667864474ff03fa2157 Mon Sep 17 00:00:00 2001 From: jonasscheid Date: Mon, 9 Oct 2023 10:48:59 +0000 Subject: [PATCH 10/10] feedback --- README.md | 11 ++++++----- conf/base.config | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a796059b..679be851 100644 --- a/README.md +++ b/README.md @@ -46,10 +46,11 @@ Each row represents a mass spectrometry run in one of the formats: raw, mzML, d Now, you can run the pipeline using: ```bash -nextflow run nf-core/mhcquant -profile \ - --input 'samples.tsv' \ - --fasta 'SWISSPROT_2020.fasta' \ - --outdir ./results +nextflow run nf-core/mhcquant + -profile \ + --input 'samples.tsv' \ + --fasta 'SWISSPROT_2020.fasta' \ + --outdir ./results ``` > [!NOTE] @@ -110,7 +111,7 @@ Additional functionality contained by the pipeline currently includes: - Peak intensity prediction (`MS2PIP`) > [!WARNING] -> The refine fdr feature will be evaluated on a large benchmark dataset in the following releases. +> The refine FDR feature will be evaluated on a large benchmark dataset in the following releases. > Consider it as an experimental feature. #### Refine FDR diff --git a/conf/base.config b/conf/base.config index ae01d45b..4bdccc70 100644 --- a/conf/base.config +++ b/conf/base.config @@ -57,7 +57,7 @@ process { cache = false } withName:TDF2MZML { - cpus = { check_max( 1 , 'cpus' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 10.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } }