From 831d2e9e8285e90e267dec43772f56c0fc24ed1d Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:14:17 +0100 Subject: [PATCH 01/10] adapt citation --- CITATION.cff | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index aad72dc..10d2a47 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,12 +1,33 @@ cff-version: 1.2.0 -message: "If you use this software, please cite it as below." +message: If you use this software, please cite it as below. +title: Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR authors: -- family-names: Amstler - given-names: Stephan - orcid: https://orcid.org/0009-0007-7214-7481 -title: "Umi-pipeline-nf - A nextflow pipeline to analyse UMI-tagged amplicons from nanopore sequencing." -version: 0.2.0 -identifiers: - - type: doi - value: 10.1101/2024.03.01.582741 -url: https://github.com/genepi/umi-pipeline-nf \ No newline at end of file + - family: Amstler + given: Stephan + - family: Streiter + given: Gertraud + - family: Pfurtscheller + given: Cathrin + - family: Forer + given: Lukas + - family: Di Maio + given: Silvia + - family: Weissensteiner + given: Hansi + - family: Paulweber + given: Bernhard + - family: Schoenherr + given: Sebastian + - family: Kronenberg + given: Florian + - family: Coassin + given: Stefan +doi: 10.1101/2024.03.01.582741 +date-released: 2024-03-05 +license: Apache-2.0 +repository-code: https://github.com/genepi/umi-pipeline-nf +preferred-citation: | + Amstler, S., Streiter, G., Pfurtscheller, C., Forer, L., Di Maio, S., Weissensteiner, H., Paulweber, B., Schoenherr, S., Kronenberg, F., Coassin, S. (2024). Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR. bioRxiv, 2024.03.01.582741. + doi: 10.1101/2024.03.01.582741 +preferred-citation-repository: | + Amstler, S., Streiter, G., Pfurtscheller, C., Forer, L., Di Maio, S., Weissensteiner, H., Paulweber, B., Schoenherr, S., Kronenberg, F., Coassin, S. (2024). UMI Pipeline Nextflow Repository. https://github.com/genepi/umi-pipeline-nf From d49a5f1f52e4d3a35259cb79c6b421fb50f0a69f Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:15:07 +0100 Subject: [PATCH 02/10] remove repository citation --- CITATION.cff | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 10d2a47..86eaf21 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -28,6 +28,4 @@ license: Apache-2.0 repository-code: https://github.com/genepi/umi-pipeline-nf preferred-citation: | Amstler, S., Streiter, G., Pfurtscheller, C., Forer, L., Di Maio, S., Weissensteiner, H., Paulweber, B., Schoenherr, S., Kronenberg, F., Coassin, S. (2024). Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR. bioRxiv, 2024.03.01.582741. - doi: 10.1101/2024.03.01.582741 -preferred-citation-repository: | - Amstler, S., Streiter, G., Pfurtscheller, C., Forer, L., Di Maio, S., Weissensteiner, H., Paulweber, B., Schoenherr, S., Kronenberg, F., Coassin, S. (2024). UMI Pipeline Nextflow Repository. https://github.com/genepi/umi-pipeline-nf + doi: 10.1101/2024.03.01.582741 \ No newline at end of file From d97c06b769d1baf86eac45f60fc96b2681444cbb Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:19:04 +0100 Subject: [PATCH 03/10] adapt citation --- CITATION.cff | 84 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 86eaf21..7659aa4 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,31 +1,57 @@ -cff-version: 1.2.0 -message: If you use this software, please cite it as below. -title: Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR +cff-version: "1.2.0" +message: "If you use this software, please cite it as below." +title: "Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR" authors: - - family: Amstler - given: Stephan - - family: Streiter - given: Gertraud - - family: Pfurtscheller - given: Cathrin - - family: Forer - given: Lukas - - family: Di Maio - given: Silvia - - family: Weissensteiner - given: Hansi - - family: Paulweber - given: Bernhard - - family: Schoenherr - given: Sebastian - - family: Kronenberg - given: Florian - - family: Coassin - given: Stefan -doi: 10.1101/2024.03.01.582741 -date-released: 2024-03-05 -license: Apache-2.0 -repository-code: https://github.com/genepi/umi-pipeline-nf + - family: "Amstler" + given: "Stephan" + - family: "Streiter" + given: "Gertraud" + - family: "Pfurtscheller" + given: "Cathrin" + - family: "Forer" + given: "Lukas" + - family: "Di Maio" + given: "Silvia" + - family: "Weissensteiner" + given: "Hansi" + - family: "Paulweber" + given: "Bernhard" + - family: "Schoenherr" + given: "Sebastian" + - family: "Kronenberg" + given: "Florian" + - family: "Coassin" + given: "Stefan" +doi: "10.1101/2024.03.01.582741" +date-released: "2024-03-05" +license: "Apache-2.0" +repository-code: "https://github.com/genepi/umi-pipeline-nf" preferred-citation: | - Amstler, S., Streiter, G., Pfurtscheller, C., Forer, L., Di Maio, S., Weissensteiner, H., Paulweber, B., Schoenherr, S., Kronenberg, F., Coassin, S. (2024). Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR. bioRxiv, 2024.03.01.582741. - doi: 10.1101/2024.03.01.582741 \ No newline at end of file + type: "article" + authors: + - family-names: "Amstler" + given-names: "S." + - family-names: "Streiter" + given-names: "G." + - family-names: "Pfurtscheller" + given-names: "C." + - family-names: "Forer" + given-names: "L." + - family-names: "Di Maio" + given-names: "S." + - family-names: "Weissensteiner" + given-names: "H." + - family-names: "Paulweber" + given-names: "B." + - family-names: "Schoenherr" + given-names: "S." + - family-names: "Kronenberg" + given-names: "F." + - family-names: "Coassin" + given-names: "S." + doi: "10.1101/2024.03.01.582741" + journal: "bioRxiv" + day: 5 + month: 3 + title: "Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR" + year: 2024 From 1436b848af12cf66262e7fdc20285c6e71b22614 Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:21:21 +0100 Subject: [PATCH 04/10] adapt citation --- CITATION.cff | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 7659aa4..31bd595 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,31 +2,31 @@ cff-version: "1.2.0" message: "If you use this software, please cite it as below." title: "Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR" authors: - - family: "Amstler" - given: "Stephan" - - family: "Streiter" - given: "Gertraud" - - family: "Pfurtscheller" - given: "Cathrin" - - family: "Forer" - given: "Lukas" - - family: "Di Maio" - given: "Silvia" - - family: "Weissensteiner" - given: "Hansi" - - family: "Paulweber" - given: "Bernhard" - - family: "Schoenherr" - given: "Sebastian" - - family: "Kronenberg" - given: "Florian" - - family: "Coassin" - given: "Stefan" + - family-names: "Amstler" + given-names: "Stephan" + - family-names: "Streiter" + given-names: "Gertraud" + - family-names: "Pfurtscheller" + given-names: "Cathrin" + - family-names: "Forer" + given-names: "Lukas" + - family-names: "Di Maio" + given-names: "Silvia" + - family-names: "Weissensteiner" + given-names: "Hansi" + - family-names: "Paulweber" + given-names: "Bernhard" + - family-names: "Schoenherr" + given-names: "Sebastian" + - family-names: "Kronenberg" + given-names: "Florian" + - family-names: "Coassin" + given-names: "Stefan" doi: "10.1101/2024.03.01.582741" date-released: "2024-03-05" license: "Apache-2.0" repository-code: "https://github.com/genepi/umi-pipeline-nf" -preferred-citation: | +preferred-citation: type: "article" authors: - family-names: "Amstler" From 9f36b15f7277a604bc033d349b92ae0f4dd3cfe1 Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:40:06 +0100 Subject: [PATCH 05/10] Adapt citation file --- CITATION.cff | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 31bd595..fffc8f8 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -30,25 +30,25 @@ preferred-citation: type: "article" authors: - family-names: "Amstler" - given-names: "S." + given-names: "S" - family-names: "Streiter" - given-names: "G." + given-names: "G" - family-names: "Pfurtscheller" - given-names: "C." + given-names: "C" - family-names: "Forer" - given-names: "L." + given-names: "L" - family-names: "Di Maio" - given-names: "S." + given-names: "S" - family-names: "Weissensteiner" - given-names: "H." + given-names: "H" - family-names: "Paulweber" - given-names: "B." + given-names: "B" - family-names: "Schoenherr" - given-names: "S." + given-names: "S" - family-names: "Kronenberg" - given-names: "F." + given-names: "F" - family-names: "Coassin" - given-names: "S." + given-names: "S" doi: "10.1101/2024.03.01.582741" journal: "bioRxiv" day: 5 From 845a98fae635b6b7ef3582a9c9cace411b44a563 Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 15:44:51 +0100 Subject: [PATCH 06/10] adapt citation --- CITATION.cff | 40 ++++++++++++++++++++-------------------- README.md | 6 ++++++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index fffc8f8..343c8a5 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -29,26 +29,26 @@ repository-code: "https://github.com/genepi/umi-pipeline-nf" preferred-citation: type: "article" authors: - - family-names: "Amstler" - given-names: "S" - - family-names: "Streiter" - given-names: "G" - - family-names: "Pfurtscheller" - given-names: "C" - - family-names: "Forer" - given-names: "L" - - family-names: "Di Maio" - given-names: "S" - - family-names: "Weissensteiner" - given-names: "H" - - family-names: "Paulweber" - given-names: "B" - - family-names: "Schoenherr" - given-names: "S" - - family-names: "Kronenberg" - given-names: "F" - - family-names: "Coassin" - given-names: "S" + - family-names: "Amstler" + given-names: "Stephan" + - family-names: "Streiter" + given-names: "Gertraud" + - family-names: "Pfurtscheller" + given-names: "Cathrin" + - family-names: "Forer" + given-names: "Lukas" + - family-names: "Di Maio" + given-names: "Silvia" + - family-names: "Weissensteiner" + given-names: "Hansi" + - family-names: "Paulweber" + given-names: "Bernhard" + - family-names: "Schoenherr" + given-names: "Sebastian" + - family-names: "Kronenberg" + given-names: "Florian" + - family-names: "Coassin" + given-names: "Stefan" doi: "10.1101/2024.03.01.582741" journal: "bioRxiv" day: 5 diff --git a/README.md b/README.md index 7d48cdd..f526e7d 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ nextflow run genepi/umi-pipeline-nf -r v0.1.0 -profile test,docker nextflow run genepi/umi-pipeline-nf -r v0.1.0 -c -profile docker ``` +## Citation + +If you use the pipeline please cite our Paper: + +Amstler S, Streiter G, Pfurtscheller C, Forer L, Di Maio S, Weissensteiner H, Paulweber B, Schoenherr S, Kronenberg F, Coassin S. Nanopore sequencing with unique molecular identifiers enables accurate mutation analysis and haplotyping in the complex Lipoprotein(a) KIV-2 VNTR. bioRxiv. 2024. doi: 10.1101/2024.03.01.582741. + ### Credits From d3191d291f1690f348b1a5d43f85da11002eec61 Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 16:03:49 +0100 Subject: [PATCH 07/10] adapt config files --- config/custom.config | 28 +++++++++++++++++++---- env/Dockerfile | 4 +--- lib/processes/variant_calling/mutserve.nf | 2 +- nextflow.config | 3 ++- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/config/custom.config b/config/custom.config index 1b6d97a..44720a4 100644 --- a/config/custom.config +++ b/config/custom.config @@ -21,13 +21,31 @@ params { // adaptable parameters - output_format = "fastq" - filter_strategy_clusters = "quality" + //READ FILTERING + min_read_length = 0 + min_qscore = 10 - call_variants = true - variant_caller = "freebayes" + // SUBSAMPLING + subsampling = false + subsampling_seed = 11 + subsampling_readnumber = 100000 + + // VARIANT_CALLING + call_variants = false + variant_caller = "freebayes" - medaka_model = "r1041_e82_400bps_hac_g615" + // ADVANCED + min_reads_per_barcode = 1000 + umi_errors = 2 + max_dist_umi = 2 + min_reads_per_cluster = 20 + max_reads_per_cluster = 60 + min_consensus_quality = 40 + masking_strategy = "softmask" + filter_strategy_clusters = "quality" + min_overlap = 0.95 + balance_strands = true + medaka_model = "r1041_e82_400bps_hac_g615" } // NEXTFLOW REPORTING diff --git a/env/Dockerfile b/env/Dockerfile index 3af161b..94c72a5 100644 --- a/env/Dockerfile +++ b/env/Dockerfile @@ -18,8 +18,6 @@ RUN conda update -y conda && \ conda clean --all WORKDIR "/opt" -RUN wget https://github.com/seppinho/mutserve/releases/download/v2.0.0-rc15/mutserve.zip && \ - unzip mutserve.zip -ENV PATH="/opt/mutserve:${PATH}" +RUN wget https://github.com/seppinho/mutserve/releases/download/v2.0.0-rc13.lpa/mutserve_LPA_adapted.jar diff --git a/lib/processes/variant_calling/mutserve.nf b/lib/processes/variant_calling/mutserve.nf index 79979a3..a69384b 100644 --- a/lib/processes/variant_calling/mutserve.nf +++ b/lib/processes/variant_calling/mutserve.nf @@ -14,7 +14,7 @@ process MUTSERVE { script: """ - mutserve call \ + java -jar /opt/mutserve_LPA_adapted.jar call \ --output ${type}.vcf \ --write-raw \ --reference ${reference} \ diff --git a/nextflow.config b/nextflow.config index 60f1cfe..3200a9a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,7 +39,8 @@ params { // ADVANCED min_reads_per_barcode = 1000 - umi_errors = 3 + umi_errors = 2 + max_dist_umi = 2 min_reads_per_cluster = 20 max_reads_per_cluster = 60 min_consensus_quality = 40 From b5d5cbb5008802e266bf198f3767899e254fd2eb Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 16:04:00 +0100 Subject: [PATCH 08/10] adapt test.config --- config/test.config | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/config/test.config b/config/test.config index 281bf74..561685d 100644 --- a/config/test.config +++ b/config/test.config @@ -8,28 +8,20 @@ params { - help = false - version = false - debug = true + help = false + version = false + debug = true - input = "$baseDir/data/fastq_pass/" - output = "umi-pipeline-nf_test-run" - reference = "$baseDir/data/ref/lpa-ref2645.fasta" - reference_fai = "$baseDir/data/ref/lpa-ref2645.fasta.fai" - bed = "$baseDir/data/ref/lpa-ref2645.bed" - - subsampling = false + input = "$baseDir/data/fastq_pass/" + output = "umi-pipeline-nf_test-run" + reference = "$baseDir/data/ref/lpa-ref2645.fasta" + reference_fai = "$baseDir/data/ref/lpa-ref2645.fasta.fai" + bed = "$baseDir/data/ref/lpa-ref2645.bed" min_reads_per_cluster = 10 max_reads_per_cluster = 20 - - write_reports = true - output_format = "fastq" - filter_strategy_clusters = "quality" - call_variants = true - variant_caller = "freebayes" - - medaka_model = "r1041_e82_400bps_hac_g615" + call_variants = true + variant_caller = "mutserve" } // NEXTFLOW REPORTING From c12b48f0adfe757e3871aab85d9fd3691f111617 Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 16:11:02 +0100 Subject: [PATCH 09/10] reduce initial polish cluster resources but increase retry --- config/base.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/base.config b/config/base.config index 6da52d0..6a4280d 100644 --- a/config/base.config +++ b/config/base.config @@ -4,10 +4,10 @@ // PROCESS RESOURCES process { withName: "POLISH_CLUSTER" { - memory = { 10.GB * task.attempt } - cpus = 2 + memory = { 1.GB * task.attempt } + cpus = 1 } errorStrategy = 'retry' - maxRetries = 3 + maxRetries = 5 } From 25a239c1e0227c96e18a9158afb8d332d83a7daf Mon Sep 17 00:00:00 2001 From: AmstlerStephan Date: Wed, 6 Mar 2024 16:23:26 +0100 Subject: [PATCH 10/10] prepare release --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 3200a9a..a51c39d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,7 +1,7 @@ // NEXTFLOW MANIFEST manifest { name = 'umi-pipeline-nf' - version = '0.1.0' + version = '0.2.0' description = 'Nextflow pipeline to analyze umi amplicon data' author = 'Stephan Amstler' homePage = 'https://github.com/AmstlerStephan/umi-pipeline-nf.git' @@ -67,7 +67,7 @@ params { // Load base.config by default for all pipelines includeConfig "${baseDir}/config/base.config" -process.container = 'quay.io/genepi/umi-pipeline-nf:v0.1.0' +process.container = 'quay.io/genepi/umi-pipeline-nf:v0.2.0' profiles {