diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ba81b83b..422860aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,6 +104,8 @@ jobs: "test_assembled_hs", "test_assembled_mm", "test_10x_sc", + "test_mixcr_bulk", + "test_mixcr_sc", "test_clontech_umi", "test_nebnext_umi", "test_rnaseq_bulk", @@ -135,3 +137,52 @@ jobs: /home/runner/work !/home/runner/work/conda !/home/runner/work/singularity + test_profile_download: + name: nf-test with profiles needing mixcr license + # Only run on PRs from branches of nf-core/airrflow repository or on push if this is the nf-core dev branch (merged PRs) + # (GitHub secrets are not accessible for workflows from forks!) + if: "${{ github.event.pull_request.head.repo.full_name == 'nf-core/airrflow' || (github.event_name == 'push' && github.repository == 'nf-core/airrflow') }}" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + profile: ["test_mixcr_bulk", "test_mixcr_sc"] + + steps: + - name: Check out pipeline code + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + + - name: Run nf-test + env: + NCBI_EMAIL: ${{ secrets.MIXCR_LICENSE }} + run: | + nextflow secrets set MIXCR_LICENSE ${{ secrets.MIXCR_LICENSE }} + nf-test test --tag ${{ matrix.profile }} --profile ${{ matrix.profile }},docker --junitxml=test.xml --verbose + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: "*.xml" diff --git a/conf/modules.config b/conf/modules.config index 649f859b..357a2167 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -731,4 +731,40 @@ process { ] } + // ------------------------------- + // MiXCR + // ------------------------------- + + withName: MIXCR_IND_POSTANALYSIS { + publishDir = [ + path: { "${params.outdir}/mixcr/individual_postanalysis" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MIXCR_IND_PLOTS { + publishDir = [ + path: { "${params.outdir}/mixcr/individual_postanalysis/plots" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MIXCR_OVERLAP_POSTANALYSIS { + publishDir = [ + path: { "${params.outdir}/mixcr/overlap_postanalysis" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MIXCR_OVERLAP_PLOTS { + publishDir = [ + path: { "${params.outdir}/mixcr/overlap_postanalysis/plots" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/conf/test_mixcr_bulk.config b/conf/test_mixcr_bulk.config new file mode 100644 index 00000000..6b35a757 --- /dev/null +++ b/conf/test_mixcr_bulk.config @@ -0,0 +1,40 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/airrflow -profile test_mixcr_bulk, + */ + +process { + resourceLimits = [ + cpus: 2, + memory: '6.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test MiXCR toolset on bulk data.' + config_profile_description = 'Minimal test dataset to check pipeline functionality with raw bulk data using the MiXCR toolset' + + + // params + mode = 'fastq' + library_generation_method = 'mixcr' + kit = "irepertoire-human-rna-xcr-repseq-lr" + clonal_threshold = 0 + mixcr_postanalysis = true + mixcr_downsampling = "count-read-auto" + mixcr_weightfunction = "none" + mixcr_productive_only = true + mixcr_drop_outliers = true + mixcr_overlap_criteria = "CDR3|AA|V|J" + mixcr_diversity_plottype = "boxplot" + + + // Input data + input = pipelines_testdata_base_path + 'testdata-tcr/TCR_metadata_airr.tsv' + imgt_mixcr = "https://github.com/repseqio/library-imgt/releases/download/v8/imgt.202312-3.sv8.json.gz" +} diff --git a/conf/test_mixcr_sc.config b/conf/test_mixcr_sc.config new file mode 100644 index 00000000..3205e1db --- /dev/null +++ b/conf/test_mixcr_sc.config @@ -0,0 +1,42 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/airrflow -profile test_mixcr_sc, + */ + +includeConfig "https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-mixcr/10x_sc.conf" + +process { + resourceLimits = [ + cpus: 2, + memory: '6.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test MiXCR sc analysis.' + config_profile_description = 'Minimal test dataset to check pipeline functionality with raw single cell data from 10xGenomics using the MiXCR toolset' + + // params + mode = 'fastq' + library_generation_method = 'mixcr' + clonal_threshold = 0 + kit = '10x-sc-xcr-vdj' + mixcr_postanalysis = true + mixcr_downsampling = "count-read-auto" + mixcr_weightfunction = "none" + mixcr_productive_only = true + mixcr_drop_outliers = true + mixcr_overlap_criteria = "CDR3|AA|V|J" + mixcr_diversity_plottype = "boxplot" + + + // Input data + input = pipelines_testdata_base_path + 'testdata-sc/10x_sc_raw.tsv' + reference_10x = pipelines_testdata_base_path + 'testdata-sc/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz' + imgt_mixcr = "https://github.com/repseqio/library-imgt/releases/download/v8/imgt.202312-3.sv8.json.gz" +} diff --git a/docs/images/metro-map-airrflow.pdf b/docs/images/metro-map-airrflow.pdf index b55e1477..a94a6acc 100644 Binary files a/docs/images/metro-map-airrflow.pdf and b/docs/images/metro-map-airrflow.pdf differ diff --git a/docs/images/metro-map-airrflow.png b/docs/images/metro-map-airrflow.png index ac4ee67e..377b31e8 100644 Binary files a/docs/images/metro-map-airrflow.png and b/docs/images/metro-map-airrflow.png differ diff --git a/docs/images/metro-map-airrflow.svg b/docs/images/metro-map-airrflow.svg new file mode 100644 index 00000000..a2699b30 --- /dev/null +++ b/docs/images/metro-map-airrflow.svgmmcantationingle cellunselected RNA-seqCellranger FastpTRUST4Align to DBAssemble AlignmentsExportMiXCR QCExport AIRRInidividual OverlapFastpvdjClonotypes Postanalysis Postanalysis + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +MiXCRRepertoire and Clonotype Analysis and ReportingPDF diff --git a/docs/usage.md b/docs/usage.md index 608767d0..05130411 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,6 +7,7 @@ ## Introduction The nf-core/airrflow pipeline allows processing BCR and TCR targeted sequencing data from bulk and single-cell sequencing protocols. It performs sequence assembly, V(D)J assignment, clonotyping, lineage reconsctruction and repertoire analysis using the [Immcantation](https://immcantation.readthedocs.io/en/stable/) framework. +Recently, TCR and BCR processing using the [MiXCR](https://mixcr.com/) toolset was introduced. ![nf-core/airrflow overview](images/airrflow_workflow_overview.png) @@ -444,13 +445,59 @@ The UMI barcodes are typically read from an index file but sometimes can be prov - No UMIs in R1 or R2 reads: if no UMIs are present in the samples, specify `--umi_length 0` to use the sans-UMI subworkflow. +## Bulk processing using the MiXCR toolset + +Processing of TCR and BCR repertoires can be performed using MiXCR. +When adding the `--library-generation-method mixcr` flag, preprocessing (QC and sequence assembly) will be performed using MiXCR instead of pRESTO. + +The resulting AIRR files are used as input for airrflow's _V(D)J annotation and filtering step_ and the remaining steps of the analysis are carried out by the IMMCANTATION framework. Optionally, the user can decide to perform MiXCR's postprocessing as well by setting the `--mixcr_postanalysis` flag. + +Using the `--kit` flag one can choose from the great variety of MiXCRs built-in [presets](https://mixcr.com/mixcr/reference/overview-built-in-presets/) containing all required options for many commercially available kits, data types and library preparation protocols. + +A typical command when using MiXCR may look as the following. Here,data derived from iRepertoire's human RNA short read kit was analyzed: + +```bash +nextflow run nf-core/airrflow \ + -profile \ + --mode fastq \ + --library_generation_method mixcr \ + --kit irepertoire-human-rna-xcr-repseq-lr \ + --input test_samplesheet.tsv \ + --outdir results \ + --clonal_threshold 0 \ + --imgt_mixcr imgt.202312-3.sv8.json.gz \ + --mixcr_postanalysis \ + --mixcr_downsampling count-read-auto \ + --mixcr_weightfunction none \ + --mixcr_productive_only \ + --mixcr_drop_outliers \ + --mixcr_overlap_criteria CDR3|AA|V|J \ + --mixcr_diversity_plottype boxplot +``` + +#### MiXCR IMGT reference + +MiXCRs IMGT DB can be downloaded from the [GitHub](https://github.com/repseqio/library-imgt/releases). + +#### MiXCR Licence + +MiXCR is a commercial tool for which a license key is required. +Academic users can obtain a free license key [here](https://licensing.milaboratories.com/). + +Before running the pipeline, you have to store the license as a nextflow secret environmental variable: + +```bash +nextflow secrets set MIXCR_LICENSE "" +``` + ## Supported single cell library generation methods (protocols) -When processing single cell sequencing data departing from raw `fastq` reads, currently only a `--library_generation_method` to support 10xGenomics data is available. +To process raw single cell `fastq` reads 10XGenomics data can be analyzed using their framework. Also, MiXCR's single cell presets can be used. -| Library generation methods | Description | Name in pipeline | Commercial protocols | -| -------------------------- | ----------------------------------------------------------------------------------------------------------- | ---------------- | -------------------- | -| RT(RHP)+PCR | sequencing data produced from Chromium single cell 5'V(D)J libraries containing cellular barcodes and UMIs. | sc_10x_genomics | 10xGenomics | +| Library generation methods | Description | Name in pipeline | Commercial protocols | +| -------------------------- | --------------------------------------------------------------------------------------------------------------------- | ---------------- | -------------------- | +| RT(RHP)+PCR | sequencing data produced from Chromium single cell 5'V(D)J libraries containing cellular barcodes and UMIs. | sc_10x_genomics | 10xGenomics | +| | Different MiXCR single cell [presets](https://mixcr.com/mixcr/reference/overview-built-in-presets/#parse-biosciences) | mixcr | | ### 10xGenomics @@ -459,13 +506,13 @@ The `cellranger vdj` automatically uses the Chromium cellular barcodes and UMIs Examples are provided below to run airrflow to process 10xGenomics raw FASTQ data. ```bash -nextflow run nf-core/airrflow -r dev \ --profile \ ---mode fastq \ ---input input_samplesheet.tsv \ ---library_generation_method sc_10x_genomics \ ---reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \ ---outdir ./results +nextflow run nf-core/airrflow \ + -profile \ + --mode fastq \ + --input input_samplesheet.tsv \ + --library_generation_method sc_10x_genomics \ + --reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \ + --outdir ./results ``` #### 10xGenomics reference @@ -512,6 +559,42 @@ nextflow run nf-core/airrfow \ - If UMI's are present, the read containing them must be specified using the `--umi_read` parameter. - The `--read_format` parameter can be used to specify the Cell Barcode and UMI position within the reads (see TRUST4 [docs](https://github.com/liulab-dfci/TRUST4?tab=readme-ov-file#10x-genomics-data-and-barcode-based-single-cell-data)). For scRNAseq with 10X Genomics the R1 read usually contains both the cell barcode (barcode) and UMI. So we specify "R1" for both `--umi_read` and `--cell_barcode_read`, and the positions of both the cell barcode and UMI with the `--read_format` parameter as in the example ("bc:0:15,um:16:27"). Then specify the R1 read in the filename_R1 column of the samplesheet, and the read containing the actual sequence (usually R2) in the filename_R2 column of the samplesheet. +### MiXCR + +When using MiXCR to analyze single cell data, refer to MiXCRs [presets](https://mixcr.com/mixcr/reference/overview-built-in-presets/#parse-biosciences) to select the right one. The preset can be set in airrflow using the flag `--kit`. Additional flags can be set in a config file selected using `-c`.\ +A typical command could look as followed: + +```bash +nextflow run nf-core/airrflow \ + -profile \ + -c 10x_sc.conf \ + --mode fastq \ + --library_generation_method mixcr \ + --reference_10x vdj_IMGT_human \ + --kit 10x-sc-xcr-vdj \ + --input samplesheet_sc.tsv \ + --outdir test_sc_out \ + --imgt_mixcr imgt.202312-3.sv8.json.gz \ + --clonal_threshold 0 \ + --mixcr_postanalysis \ + --mixcr_downsampling count-read-auto \ + --mixcr_weightfunction none \ + --mixcr_productive_only \ + --mixcr_drop_outliers \ + --mixcr_overlap_criteria CDR3|AA|V|J \ + --mixcr_diversity_plottype boxplot +``` + +An additional config file is required to run the pipeline with this preset: + +```bash +process { + withName: MIXCR_MIXCR { + ext.args = "--species hs" + } +} +``` + ## Core Nextflow arguments :::note diff --git a/modules/local/mixcr/mixcr.nf b/modules/local/mixcr/mixcr.nf new file mode 100644 index 00000000..bd08a511 --- /dev/null +++ b/modules/local/mixcr/mixcr.nf @@ -0,0 +1,57 @@ +process MIXCR_MIXCR { + tag "$meta.id" + label 'process_high' + + secret 'MIXCR_LICENSE' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + tuple val(meta), path(reads) + path(imgt_json) // imgt db + val(kit) + + output: + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.clns') , emit: clns + tuple val(meta), path('*.vdjca') , emit: vdjca + tuple val(meta), path('*.txt') , emit: txt + tuple val(meta), path('*.tsv') , emit: tsv + tuple val(meta), path('*clones*.tsv') , emit: clones_tsv + tuple val(meta), path('*') , emit: outs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr analyze ${kit} \\ + --library imgt \\ + ${reads} \\ + ${prefix} \\ + -t ${task.cpus} \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_exportairr.nf b/modules/local/mixcr/mixcr_exportairr.nf new file mode 100644 index 00000000..2c21cbe9 --- /dev/null +++ b/modules/local/mixcr/mixcr_exportairr.nf @@ -0,0 +1,53 @@ +process MIXCR_MIXCREXPORTAIRR { + tag "$meta.id" + label 'process_single' + + secret 'MIXCR_LICENSE' + + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + + tuple val(meta), path(clns) + path(imgt_json) // imgt db + + output: + tuple val(meta), path("*.airr.tsv"), emit: mixcr_airr + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR_exportairr module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def clns_file = clns.size() > 1 ? clns.first() : clns // for sc input 2 clns are provided which create the same airr file. So, we take the first one. + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportAirr \\ + ${clns_file} \\ + ${prefix}.airr.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcrexportairr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_individualpostanalysis.nf b/modules/local/mixcr/mixcr_individualpostanalysis.nf new file mode 100644 index 00000000..575d4931 --- /dev/null +++ b/modules/local/mixcr/mixcr_individualpostanalysis.nf @@ -0,0 +1,61 @@ +process MIXCR_IND_POSTANALYSIS { + tag "$meta.id" + label 'process_medium' + + secret 'MIXCR_LICENSE' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + tuple val(meta), path(clns) + val(downsampling) + val(weight_function) + val(productive) + val(drop_outliers) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path('*') , emit: outs + tuple val(meta), path('*.json') , emit: mixcr_ind_json + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def productive_only = productive ? '--only-productive' : '' + def drop_outliers = drop_outliers ? '--drop-outliers' : '' + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr postanalysis individual \\ + --default-downsampling ${downsampling} \\ + --default-weight-function ${weight_function} \\ + ${productive_only} \\ + ${drop_outliers} \\ + ${clns} \\ + ${prefix}.individual_postanalysis.json \\ + $args \\ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_individualpostanalysis_plots.nf b/modules/local/mixcr/mixcr_individualpostanalysis_plots.nf new file mode 100644 index 00000000..4d2674f5 --- /dev/null +++ b/modules/local/mixcr/mixcr_individualpostanalysis_plots.nf @@ -0,0 +1,113 @@ +process MIXCR_IND_PLOTS { + tag "$meta.id" + label 'process_medium' + + secret 'MIXCR_LICENSE' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + tuple val(meta), path(mixcr_ind_json) + val(diversity_plottype) + val(statistical_method) + val(p_adjust_method) + + output: + tuple val(meta), path('*') , emit: outs + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + # individual diversity plots + mixcr exportPlots diversity \\ + --plot-type ${diversity_plottype} \\ + --method ${statistical_method} \\ + --p-adjust-method ${p_adjust_method} \\ + ${mixcr_ind_json} \\ + ${prefix}.diversity.pdf + + # individual cdr3 plots + mixcr exportPlots cdr3metrics \\ + ${mixcr_ind_json} \\ + --method ${statistical_method} \\ + --p-adjust-method ${p_adjust_method} \\ + ${prefix}.cdr3.pdf + + # V usage + mixcr exportPlots vUsage \\ + ${mixcr_ind_json} \\ + ${prefix}.V_usage_heatmap.pdf + + mixcr exportPlots vUsage \\ + --bar-plot \\ + ${mixcr_ind_json} \\ + ${prefix}.V_usage_barplot.pdf + + mixcr exportPlots vUsage \\ + --bar-plot-by-sample \\ + ${mixcr_ind_json} \\ + ${prefix}.V_usage_barplot_by_sample.pdf + + # J usage + mixcr exportPlots jUsage \\ + ${mixcr_ind_json} \\ + ${prefix}.J_usage_heatmap.pdf + + mixcr exportPlots jUsage \\ + --bar-plot \\ + ${mixcr_ind_json} \\ + ${prefix}.J_usage_barplot.pdf + + mixcr exportPlots jUsage \\ + --bar-plot-by-sample \\ + ${mixcr_ind_json} \\ + ${prefix}.J_usage_barplot_by_sample.pdf + + # VJ usage + mixcr exportPlots vjUsage \\ + ${mixcr_ind_json} \\ + ${prefix}.VJ_usage_heatmap.pdf + + # Isotype usage + mixcr exportPlots isotypeUsage \\ + ${mixcr_ind_json} \\ + ${prefix}.isotype_usage_heatmap.pdf + + mixcr exportPlots isotypeUsage \\ + --bar-plot \\ + ${mixcr_ind_json} \\ + ${prefix}.isotype_usage_barplot.pdf + + mixcr exportPlots isotypeUsage \\ + --bar-plot-by-sample \\ + ${mixcr_ind_json} \\ + ${prefix}.isotype_usage_barplot_by_sample.pdf + + + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_overlappostanalysis.nf b/modules/local/mixcr/mixcr_overlappostanalysis.nf new file mode 100644 index 00000000..af17c91a --- /dev/null +++ b/modules/local/mixcr/mixcr_overlappostanalysis.nf @@ -0,0 +1,64 @@ +process MIXCR_OVERLAP_POSTANALYSIS { + tag "$meta.id" + label 'process_medium' + + secret 'MIXCR_LICENSE' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + tuple val(meta), path(clns) + val(downsampling) + val(weight_function) + val(productive) + val(drop_outliers) + val(criteria) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path('*') , emit: outs + tuple val(meta), path('*.json') , emit: mixcr_overlap_json + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def productive_only = productive ? '--only-productive' : '' + def drop_outliers = drop_outliers ? '--drop-outliers' : '' + def criteria_pick = criteria ? "--criteria '${criteria}'" : '' + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr postanalysis overlap \\ + --default-downsampling ${downsampling} \\ + --default-weight-function ${weight_function} \\ + ${productive_only} \\ + ${drop_outliers} \\ + ${criteria_pick} \\ + ${clns} \\ + ${prefix}.overlap_postanalysis.json \\ + $args \\ + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_overlappostanalysis_plots.nf b/modules/local/mixcr/mixcr_overlappostanalysis_plots.nf new file mode 100644 index 00000000..3e3fef4d --- /dev/null +++ b/modules/local/mixcr/mixcr_overlappostanalysis_plots.nf @@ -0,0 +1,49 @@ +process MIXCR_OVERLAP_PLOTS { + tag "$meta.id" + label 'process_medium' + + secret 'MIXCR_LICENSE' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + tuple val(meta), path(mixcr_overlap_json) + + output: + tuple val(meta), path('*') , emit: outs + + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportPlots overlap \\ + ${mixcr_overlap_json} \\ + ${prefix}.overlap.pdf + + + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_qc_align.nf b/modules/local/mixcr/mixcr_qc_align.nf new file mode 100644 index 00000000..147549d4 --- /dev/null +++ b/modules/local/mixcr/mixcr_qc_align.nf @@ -0,0 +1,53 @@ +process MIXCR_MIXCRQCALIGN { + tag "$meta.id" + label 'process_single' + + secret 'MIXCR_LICENSE' + + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + + tuple val(meta), path(clns) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path("*.align_qc.pdf"), emit: align_qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR_exportairr module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportQc align \\ + ${clns} \\ + ${prefix}.align_qc.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcrexportairr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_qc_chainusage.nf b/modules/local/mixcr/mixcr_qc_chainusage.nf new file mode 100644 index 00000000..01c7a02b --- /dev/null +++ b/modules/local/mixcr/mixcr_qc_chainusage.nf @@ -0,0 +1,53 @@ +process MIXCR_MIXCRQCCHAINUSAGE { + tag "$meta.id" + label 'process_single' + + secret 'MIXCR_LICENSE' + + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + + tuple val(meta), path(clns) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path("*.chainusage_qc.pdf"), emit: chainusage_qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR_exportairr module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportQc chainUsage \\ + ${clns} \\ + ${prefix}.chainusage_qc.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcrexportairr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_qc_coverage.nf b/modules/local/mixcr/mixcr_qc_coverage.nf new file mode 100644 index 00000000..0bcf33cc --- /dev/null +++ b/modules/local/mixcr/mixcr_qc_coverage.nf @@ -0,0 +1,53 @@ +process MIXCR_MIXCRQCCOVERAGE { + tag "$meta.id" + label 'process_single' + + secret 'MIXCR_LICENSE' + + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + + tuple val(meta), path(vdjca) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path("*.coverage_qc.pdf"), emit: coverage_qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR_exportairr module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportQc coverage \\ + ${vdjca} \\ + ${prefix}.coverage_qc.pdf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcrexportairr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/modules/local/mixcr/mixcr_qc_tags.nf b/modules/local/mixcr/mixcr_qc_tags.nf new file mode 100644 index 00000000..18649003 --- /dev/null +++ b/modules/local/mixcr/mixcr_qc_tags.nf @@ -0,0 +1,53 @@ +process MIXCR_MIXCRQCTAGS { + tag "$meta.id" + label 'process_single' + + secret 'MIXCR_LICENSE' + + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0': + 'ghcr.io/milaboratory/mixcr/mixcr:4.6.0' }" + + input: + + tuple val(meta), path(clns) + path(imgt_json) // imgt db + + + output: + tuple val(meta), path("*.tags_qc.pdf"), emit: tags_qc + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MiXCR_exportairr module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # activate license + if [ \${MIXCR_LICENSE:-"unset"} != "unset" ]; then + echo "Initializing MIXCR_LICENSE env variable" + export MI_LICENSE=\$MIXCR_LICENSE + fi + + mixcr exportQc tags \\ + ${clns} \\ + ${prefix}.tags_qc.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mixcrexportairr: \$(mixcr --version |& sed '1!d ; s/mixcr //') + END_VERSIONS + """ +} diff --git a/nextflow.config b/nextflow.config index a0a6da09..af169c09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,7 @@ params { index_file = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/' + // ---------------------------- // sequencing protocol options // ---------------------------- @@ -134,6 +135,20 @@ params { read_format = null umi_read = null + // ----------------------- + // MiXCR option + // ----------------------- + imgt_mixcr = null + kit = null + mixcr_postanalysis = false + mixcr_downsampling = null + mixcr_weightfunction = null + mixcr_productive_only = true + mixcr_drop_outliers = false + mixcr_overlap_criteria = "CDR3|AA|V|J" + mixcr_diversity_plottype = "boxplot" + mixcr_statistical_method = "Wilcoxon" + mixcr_p_adjust_method = "Holm" // ----------------------- // generic nf-core options @@ -230,6 +245,8 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + singularity.runOptions = '-B $HOME' // Add this line + } podman { podman.enabled = true @@ -293,6 +310,8 @@ profiles { test_nocluster { includeConfig 'conf/test_nocluster.config' } test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' } test_10x_sc { includeConfig 'conf/test_10x_sc.config' } + test_mixcr_bulk { includeConfig 'conf/test_mixcr_bulk.config' } + test_mixcr_sc { includeConfig 'conf/test_mixcr_sc.config' } test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' } test_maskprimers_extract { includeConfig 'conf/test_maskprimers_extract.config' } test_maskprimers_align {includeConfig 'conf/test_maskprimers_align.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 334ce156..6fb550fe 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -67,7 +67,8 @@ "dt_5p_race", "dt_5p_race_umi", "sc_10x_genomics", - "trust4" + "trust4", + "mixcr" ], "help_text": "Available protocols are:\n- `specific_pcr_umi`: RT-PCR using transcript-specific primers containing UMIs.\n- `specific_pcr`: RT-PCR using transcript-specific primers.\n- `dt_5p_race_umi`: 5\u2019-RACE PCR using oligo-dT primers and template switch primers containing UMI.\n- `dt_5p_race`: 5\u2019-RACE PCR (i.e. RT is followed by a template switch (TS) step) using oligo-dT primers.\n- `sc_10x_genomics`:10x genomics library preparation protocol for scVDJ sequencing." }, @@ -549,6 +550,103 @@ "help_text": "Options for running raw RNA seq data.", "fa_icon": "fab fa-pagelines" }, + + "mixcr_options": { + "title": "MiXCR options", + "type": "object", + "description": "Options for the MiXCR VDJ assignment and postprocessing processes.", + "default": "", + "properties": { + "imgt_mixcr": { + "type": "string", + "description": "MiXCRs IMGT DB can be downloaded from the [GitHub](https://github.com/repseqio/library-imgt/releases).", + "fa_icon": "fas fa-database" + }, + "kit": { + "type": "string", + "description": "MiXCR preset", + "help_text": "MiXCRs built-in presets can be found [here](https://mixcr.com/mixcr/reference/overview-built-in-presets/)", + "fa_icon": "fas fa-toolbox" + }, + "mixcr_postanalysis": { + "type": "boolean", + "description": "Run MiXCR post analysis.", + "fa_icon": "fas fa-splotch" + }, + "mixcr_downsampling": { + "type": "string", + "description": "Choose downsampling method.", + "help_text": "Default downsampling method to normalize the cloneset. See [MiXCR docs](https://mixcr.com/mixcr/reference/mixcr-postanalysis/#downsampling) for details.", + "fa_icon": "fas fa-chart-simple" + }, + "mixcr_weightfunction": { + "type": "string", + "description": "Choose weight function.", + "help_text": "Default clonotype weight function. See [MiXCR docs](https://mixcr.com/mixcr/reference/mixcr-postanalysis/#weight-functions) for details.", + "fa_icon": "fas fa-weight-scale" + }, + "mixcr_productive_only": { + "type": "boolean", + "default": true, + "description": "Filter out non-productive sequencs for individual postprocessing.", + "fa_icon": "fas fa-check" + }, + "mixcr_drop_outliers": { + "type": "boolean", + "default": false, + "description": "Drop outliers below downsampling value as defined in `mixcr_downsampling` individual postprocessing.", + "fa_icon": "fas fa-splotch" + }, + "mixcr_overlap_criteria": { + "type": "string", + "default": "CDR3|AA|V|J", + "description": "Defines the rules to treat clones as equal.", + "help_text": "Defines the rules to treat clones as equal. Allows to specify gene feature for overlap (nt or AA) and optionally use V/J hits. Default: Overlap by AA CDR3 and V and J. See [docs](https://mixcr.com/mixcr/reference/mixcr-postanalysis/#command-line-options_1) for help.", + "fa_icon": "fas fa-splotch" + }, + "mixcr_diversity_plottype": { + "type": "string", + "default": "boxplot", + "description": "Plot type to plot diversity in.", + "enum": [ + "boxplot", + "boxplot-bindot", + "boxplot-jitter", + "violin", + "violin-bindot", + "barplot", + "barplot-stacked", + "lineplot", + "lineplot-jitter", + "lineplot-bindot", + "scatter" + ], + "fa_icon": "fas fa-chart-simple" + }, + "mixcr_statistical_method": { + "type": "string", + "default": "Wilcoxon", + "description": "Statistical test method used when plotting diversity and cdr3 metrics.", + "enum": ["Wilcoxon", "ANOVA", "TTest", "KruskalWallis", "KolmogorovSmirnov"], + "fa_icon": "fas fa-chart-simple" + }, + "mixcr_p_adjust_method": { + "type": "string", + "default": "Holm", + "description": "Method to adjust p-values when plotting diversity and cdr3 metrics.", + "enum": [ + "Holm", + "BenjaminHochberg", + "BenjaminYekutieli", + "Bonferroni", + "Hochberg", + "Hommel", + "none" + ], + "fa_icon": "fas fa-chart-simple" + } + } + }, "report_options": { "title": "Report options", "type": "object", @@ -794,10 +892,7 @@ "$ref": "#/$defs/single_cell_analysis_options" }, { - "$ref": "#/$defs/rnaseq_based_analysis_options" - }, - { - "$ref": "#/$defs/report_options" + "$ref": "#/$defs/reference_genome_options" }, { "$ref": "#/$defs/protocol" @@ -809,22 +904,10 @@ "$ref": "#/$defs/umi_barcode_handling" }, { - "$ref": "#/$defs/adapter_trimming" - }, - { - "$ref": "#/$defs/sequence_assembly_options" - }, - { - "$ref": "#/$defs/vdj_annotation_options" - }, - { - "$ref": "#/$defs/bulk_filtering_options" - }, - { - "$ref": "#/$defs/clonal_analysis_options" + "$ref": "#/$defs/generic_options" }, { - "$ref": "#/$defs/single_cell_analysis_options" + "$ref": "#/$defs/mixcr_options" }, { "$ref": "#/$defs/rnaseq_based_analysis_options" @@ -832,14 +915,8 @@ { "$ref": "#/$defs/report_options" }, - { - "$ref": "#/$defs/reference_genome_options" - }, { "$ref": "#/$defs/institutional_config_options" - }, - { - "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index 68237551..3f41af1a 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -73,6 +73,8 @@ workflow CLONAL_ANALYSIS { .map{ get_meta_tabs(it) } .set{ ch_define_clones } + // ch_define_clones.view() + DEFINE_CLONES_COMPUTE( ch_define_clones, clone_threshold.collect(), diff --git a/subworkflows/local/mixcr_flow.nf b/subworkflows/local/mixcr_flow.nf new file mode 100644 index 00000000..c37bf666 --- /dev/null +++ b/subworkflows/local/mixcr_flow.nf @@ -0,0 +1,93 @@ +include { FETCH_DATABASES } from '../../modules/local/fetch_databases' +include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' +include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' +include { MIXCR_MIXCR } from '../../modules/local/mixcr/mixcr' +include { MIXCR_MIXCREXPORTAIRR } from '../../modules/local/mixcr/mixcr_exportairr' +include { MIXCR_MIXCRQCALIGN } from '../../modules/local/mixcr/mixcr_qc_align' +include { MIXCR_MIXCRQCCOVERAGE } from '../../modules/local/mixcr/mixcr_qc_coverage' +include { MIXCR_MIXCRQCTAGS } from '../../modules/local/mixcr/mixcr_qc_tags' +include { MIXCR_MIXCRQCCHAINUSAGE } from '../../modules/local/mixcr/mixcr_qc_chainusage' +include { FASTQ_INPUT_CHECK } from '../../subworkflows/local/fastq_input_check' +include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../../modules/local/changeo/changeo_convertdb_fasta' + + +workflow MIXCR_FLOW { + + take: + ch_input + + main: + + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + // + // read in samplesheet, validate and stage input fies + // + FASTQ_INPUT_CHECK( + ch_input + ) + ch_versions = ch_versions.mix(FASTQ_INPUT_CHECK.out.versions) + + ch_reads = FASTQ_INPUT_CHECK.out.reads + + MIXCR_MIXCR ( + ch_reads, + file(params.imgt_mixcr), + params.kit + ) + ch_versions = ch_versions.mix(MIXCR_MIXCR.out.versions.first()) + + ch_mixcr_out = MIXCR_MIXCR.out.outs + + + MIXCR_MIXCREXPORTAIRR ( + MIXCR_MIXCR.out.clns, + file(params.imgt_mixcr) // it doesnt directly use the imgt db, but it needs it in the right directory anyway + ) + ch_versions = ch_versions.mix(MIXCR_MIXCREXPORTAIRR.out.versions.first()) + + ch_mixcr_airr = MIXCR_MIXCREXPORTAIRR.out.mixcr_airr + + // QC + MIXCR_MIXCRQCALIGN ( + MIXCR_MIXCR.out.clns, + file(params.imgt_mixcr) + ) + ch_versions = ch_versions.mix(MIXCR_MIXCRQCALIGN.out.versions.first()) + + MIXCR_MIXCRQCCOVERAGE ( + MIXCR_MIXCR.out.vdjca, + file(params.imgt_mixcr) + ) + ch_versions = ch_versions.mix(MIXCR_MIXCRQCCOVERAGE.out.versions.first()) + + MIXCR_MIXCRQCCHAINUSAGE ( + MIXCR_MIXCR.out.clns, + file(params.imgt_mixcr) + ) + ch_versions = ch_versions.mix(MIXCR_MIXCRQCCHAINUSAGE.out.versions.first()) + + + // convert airr tsv to fasta + CHANGEO_CONVERTDB_FASTA_FROM_AIRR( + MIXCR_MIXCREXPORTAIRR.out.mixcr_airr + ) + + ch_versions = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions + + ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta + + emit: + versions = ch_versions + outs = ch_mixcr_out + // mixcr output in airr format + airr = ch_mixcr_airr + // mixcr output in clns format + clns = MIXCR_MIXCR.out.clns + // mixcr output converted to FASTA format + fasta = ch_fasta + samplesheet = FASTQ_INPUT_CHECK.out.samplesheet + +} + diff --git a/subworkflows/local/mixcr_postanalysis.nf b/subworkflows/local/mixcr_postanalysis.nf new file mode 100644 index 00000000..1413dfd5 --- /dev/null +++ b/subworkflows/local/mixcr_postanalysis.nf @@ -0,0 +1,85 @@ +include { MIXCR_IND_POSTANALYSIS } from '../../modules/local/mixcr/mixcr_individualpostanalysis' +include { MIXCR_IND_PLOTS } from '../../modules/local/mixcr/mixcr_individualpostanalysis_plots' +include { MIXCR_OVERLAP_POSTANALYSIS } from '../../modules/local/mixcr/mixcr_overlappostanalysis' +include { MIXCR_OVERLAP_PLOTS } from '../../modules/local/mixcr/mixcr_overlappostanalysis_plots' + + +workflow MIXCR_POSTANALYSIS { + + take: + ch_mixcr_clns + + main: + + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + + ch_mixcr_clns.map{ it -> [ it[0].subject_id, + it[0].id, + it[0].species, + it[0].single_cell, + it[0].locus, + it[1] ] } + .groupTuple() + .map{ get_meta_tabs(it) } + .set { ch_clns_per_subject } + + + + MIXCR_IND_POSTANALYSIS ( + ch_clns_per_subject, + params.mixcr_downsampling, + params.mixcr_weightfunction, + params.mixcr_productive_only, + params.mixcr_drop_outliers, + file(params.imgt_mixcr) + ) + + ch_mixcr_ind_json = MIXCR_IND_POSTANALYSIS.out.mixcr_ind_json + + MIXCR_IND_PLOTS ( + ch_mixcr_ind_json, + params.mixcr_diversity_plottype, + params.mixcr_statistical_method, + params.mixcr_p_adjust_method + ) + + MIXCR_OVERLAP_POSTANALYSIS ( + ch_clns_per_subject, + params.mixcr_downsampling, + params.mixcr_weightfunction, + params.mixcr_productive_only, + params.mixcr_drop_outliers, + params.mixcr_overlap_criteria, + file(params.imgt_mixcr) + ) + + ch_mixcr_overlap_json = MIXCR_OVERLAP_POSTANALYSIS.out.mixcr_overlap_json + + ch_mixcr_overlap_json.view() + + MIXCR_OVERLAP_PLOTS { + ch_mixcr_overlap_json + } + + + +} + + +// Function to map +def get_meta_tabs(arr) { + def meta = [:] + meta.id = [arr[0]].unique().join("") + meta.sample_ids = arr[1] + meta.species = arr[2] + meta.single_cell = arr[3].unique().join("") + meta.locus = arr[4].unique().join("") + + def array = [] + + array = [ meta, arr[5].flatten() ] + + return array +} diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index 905c2d85..e413f1a4 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -30,7 +30,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { main: ch_versions = Channel.empty() - if (params.mode == "fastq" && params.library_generation_method != "sc_10x_genomics" && params.library_generation_method != "trust4" ) { + if (params.mode == "fastq" && params.library_generation_method != "sc_10x_genomics" && params.library_generation_method != "mixcr" && params.library_generation_method != "trust4" ) { PARSE_LOGS( ch_presto_filterseq_logs, ch_presto_maskprimers_logs, diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf index 735a8c10..79645bf4 100644 --- a/subworkflows/local/sc_raw_input.nf +++ b/subworkflows/local/sc_raw_input.nf @@ -4,6 +4,8 @@ include { RENAME_FILE as RENAME_FILE_TSV } from ' include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../../modules/local/changeo/changeo_convertdb_fasta' include { FASTQ_INPUT_CHECK } from '../../subworkflows/local/fastq_input_check' +include { MIXCR_FLOW } from './mixcr_flow' + workflow SC_RAW_INPUT { @@ -75,7 +77,15 @@ workflow SC_RAW_INPUT { ) .set { ch_renamed_tsv } - // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information) + if (params.kit) { + MIXCR_FLOW( + ch_reads_single + ) + ch_versions = ch_versions.mix(MIXCR_FLOW.out.versions) + } + + + // convert airr tsv to fasta CHANGEO_CONVERTDB_FASTA_FROM_AIRR( RENAME_FILE_TSV.out.file ) @@ -84,9 +94,6 @@ workflow SC_RAW_INPUT { ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta - // TODO: here you can add support for MiXCR sc protocols. - - emit: versions = ch_versions // complete cellranger output diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 93bda121..11d7c0db 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -42,6 +42,8 @@ include { SINGLE_CELL_QC_AND_FILTERING } from '../subworkflows/local/single_cel include { CLONAL_ANALYSIS } from '../subworkflows/local/clonal_analysis' include { REPERTOIRE_ANALYSIS_REPORTING } from '../subworkflows/local/repertoire_analysis_reporting' include { SC_RAW_INPUT } from '../subworkflows/local/sc_raw_input' +include { MIXCR_FLOW } from '../subworkflows/local/mixcr_flow' +include { MIXCR_POSTANALYSIS } from '../subworkflows/local/mixcr_postanalysis' include { FASTQ_INPUT_CHECK } from '../subworkflows/local/fastq_input_check' include { RNASEQ_INPUT } from '../subworkflows/local/rnaseq_input' @@ -81,7 +83,7 @@ workflow AIRRFLOW { if ( params.mode == "fastq" ) { - // SC:Perform sequence assembly if input type is fastq from single-cell sequencing data (currently only 10XGenomics) + // SC: Perform sequence assembly if input type is fastq from single-cell sequencing data (currently only 10XGenomics) if (params.library_generation_method == "sc_10x_genomics") { SC_RAW_INPUT( @@ -108,6 +110,7 @@ workflow AIRRFLOW { ch_fastp_json = Channel.empty() ch_fastqc_postassembly_mqc = Channel.empty() + } else if (params.library_generation_method == "trust4") { // Extract VDJ sequences from "general" RNA seq data using TRUST4 @@ -134,7 +137,36 @@ workflow AIRRFLOW { ch_fastp_json = RNASEQ_INPUT.out.fastp_reads_json ch_fastqc_postassembly_mqc = Channel.empty() } - else { + else if (params.library_generation_method == "mixcr") { + + if (!params.kit) { + error "Kit parameter is required for MiXCR analysis." + } + + MIXCR_FLOW(ch_input) + + ch_fasta = MIXCR_FLOW.out.fasta + ch_versions = ch_versions.mix(MIXCR_FLOW.out.versions) + ch_mixcr_airr = MIXCR_FLOW.out.airr + ch_mixcr_clns = MIXCR_FLOW.out.clns + ch_mixcr_out = MIXCR_FLOW.out.outs + + ch_validated_samplesheet = MIXCR_FLOW.out.samplesheet.collect() + + ch_presto_filterseq_logs = Channel.empty() + ch_presto_maskprimers_logs = Channel.empty() + ch_presto_pairseq_logs = Channel.empty() + ch_presto_clustersets_logs = Channel.empty() + ch_presto_buildconsensus_logs = Channel.empty() + ch_presto_postconsensus_pairseq_logs = Channel.empty() + ch_presto_assemblepairs_logs = Channel.empty() + ch_presto_collapseseq_logs = Channel.empty() + ch_presto_splitseq_logs = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_postassembly_mqc = Channel.empty() + } + else { // Perform sequence assembly if input type is fastq from bulk sequencing data SEQUENCE_ASSEMBLY( ch_input, @@ -275,9 +307,14 @@ workflow AIRRFLOW { } ch_versions = ch_versions.mix( REPERTOIRE_ANALYSIS_REPORTING.out.versions ) - // + // MiXCR postanalysis + if (params.mixcr_postanalysis) { + MIXCR_POSTANALYSIS ( ch_mixcr_clns ) + } + + // Collate and save software versions - // + softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info",