From bc1e3167d56060f5e0fd1fcbfbf6a1cd4e87524f Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Fri, 16 Feb 2024 12:46:55 -0500
Subject: [PATCH 1/6] avoid creating extra param

---
 conf/test_10x_sc.config            |  1 -
 nextflow.config                    |  2 -
 nextflow_schema.json               |  6 --
 subworkflows/local/sc_raw_input.nf | 97 ++++++++++++++----------------
 workflows/airrflow.nf              |  6 +-
 5 files changed, 49 insertions(+), 63 deletions(-)

diff --git a/conf/test_10x_sc.config b/conf/test_10x_sc.config
index 39a7b3d4..76936ef9 100644
--- a/conf/test_10x_sc.config
+++ b/conf/test_10x_sc.config
@@ -18,7 +18,6 @@ params {
 
     // params
     mode = 'fastq'
-    sc_raw = true
     library_generation_method = 'sc_10x_genomics'
     clonal_threshold = 0
 
diff --git a/nextflow.config b/nextflow.config
index 8a69d532..d84a0c59 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -120,7 +120,6 @@ params {
     // Single cell raw input options
     // -----------------------
     reference_10x = null
-    sc_raw = false
 
 
     // -----------------------
@@ -296,7 +295,6 @@ profiles {
     test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' }
     test_nocluster { includeConfig 'conf/test_nocluster.config' }
     test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
-    test_igblast { includeConfig 'conf/test_igblast.config' }
     test_10x_sc { includeConfig 'conf/test_10x_sc.config' }
     test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' }
     test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d0d56282..4224ebd1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -459,12 +459,6 @@
                     "description": "Path to the reference directory required by cellranger. Can either be directory or tar.gz.",
                     "help_text": "See for [IMGT](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt) or [default](https://www.10xgenomics.com/support/software/cell-ranger/downloads).",
                     "fa_icon": "fas fa-database"
-                },
-                "sc_raw": {
-                    "type": "boolean",
-                    "description": "Must be given when raw single cell data should be run.",
-                    "help_text": "Must be given when raw single cell data should be run.",
-                    "fa_icon": "fas fa-database"
                 }
             },
             "help_text": "Options for running raw single cell data.",
diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf
index 12a4b640..25e5b8c9 100644
--- a/subworkflows/local/sc_raw_input.nf
+++ b/subworkflows/local/sc_raw_input.nf
@@ -26,69 +26,64 @@ workflow SC_RAW_INPUT {
     ch_reads = FASTQ_INPUT_CHECK.out.reads
 
     // validate library generation method parameter
-    if (params.library_generation_method == 'sc_10x_genomics') {
-        if (params.vprimers) {
-            error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
-        } else if (params.race_linker) {
-            error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option."
-        }
-        if (params.cprimers)  {
-            error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require C-region primers, please provide a reference file instead or select another library method option."
-        }
-        if (params.umi_length > 0)  {
-            error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
-        }
-        if (params.reference_10x)  {
-            // necessary to allow tar.gz files as input so that tests can run
-            if (params.reference_10x.endsWith(".tar.gz")){
-                UNZIP_CELLRANGERDB(
-                    params.reference_10x
-                )
-                UNZIP_CELLRANGERDB.out.unzipped.set { ch_sc_reference }
-            } else {
-                ch_sc_reference = Channel.fromPath(params.reference_10x, checkIfExists: true)
-            }
+    if (params.vprimers) {
+        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
+    } else if (params.race_linker) {
+        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option."
+    }
+    if (params.cprimers)  {
+        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require C-region primers, please provide a reference file instead or select another library method option."
+    }
+    if (params.umi_length > 0)  {
+        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
+    }
+    if (params.reference_10x)  {
+        // necessary to allow tar.gz files as input so that tests can run
+        if (params.reference_10x.endsWith(".tar.gz")){
+            UNZIP_CELLRANGERDB(
+                params.reference_10x
+            )
+            UNZIP_CELLRANGERDB.out.unzipped.set { ch_sc_reference }
         } else {
-            error "The transcript-specific primer, 5'-RACE, UMI library generation method requires you to provide a reference file."
+            ch_sc_reference = Channel.fromPath(params.reference_10x, checkIfExists: true)
         }
+    } else {
+        error "The transcript-specific primer, 5'-RACE, UMI library generation method requires you to provide a reference file."
+    }
 
-        // run cellranger vdj
-        CELLRANGER_VDJ (
-        ch_reads,
-        ch_sc_reference
-        )
-        ch_versions = ch_versions.mix(CELLRANGER_VDJ.out.versions)
+    // run cellranger vdj
+    CELLRANGER_VDJ (
+    ch_reads,
+    ch_sc_reference
+    )
+    ch_versions = ch_versions.mix(CELLRANGER_VDJ.out.versions)
 
-        ch_cellranger_out = CELLRANGER_VDJ.out.outs
+    ch_cellranger_out = CELLRANGER_VDJ.out.outs
 
-        ch_cellranger_out
-            .map { meta, out_files ->
-                    [ meta, out_files.find { it.endsWith("airr_rearrangement.tsv") } ]
-                }
-            .set { ch_cellranger_airr }
+    ch_cellranger_out
+        .map { meta, out_files ->
+                [ meta, out_files.find { it.endsWith("airr_rearrangement.tsv") } ]
+            }
+        .set { ch_cellranger_airr }
 
-        // TODO : add VALIDATE_INPUT Module
-        // this module requires input in csv format... Might need to create this in an extra module
+    // TODO : add VALIDATE_INPUT Module
+    // this module requires input in csv format... Might need to create this in an extra module
 
-        // rename tsv file to unique name
-        RENAME_FILE_TSV(
-                    ch_cellranger_airr
-                )
-            .set { ch_renamed_tsv }
+    // rename tsv file to unique name
+    RENAME_FILE_TSV(
+                ch_cellranger_airr
+            )
+        .set { ch_renamed_tsv }
 
-        // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information)
-        CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
-                    RENAME_FILE_TSV.out.file
-                )
+    // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information)
+    CHANGEO_CONVERTDB_FASTA_FROM_AIRR(
+                RENAME_FILE_TSV.out.file
+            )
 
-        ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta
+    ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta
 
     // TODO: here you can add support for MiXCR sc protocols.
 
-    } else {
-        error "The provided library generation method is not supported. Please check the docs for `--library_generation_method`."
-    }
-
     emit:
     versions = ch_versions
     // complete cellranger output
diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index d0878af9..4fcc6f18 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -97,10 +97,12 @@ workflow AIRRFLOW {
     if ( params.mode == "fastq" ) {
 
         // SC:Perform sequence assembly if input type is fastq from single-cell sequencing data (currently only 10XGenomics)
-        if (params.sc_raw) {
+        if (params.library_generation_method == "sc_10x_genomics") {
+
             SC_RAW_INPUT(
                 ch_input
             )
+
             ch_fasta                                = SC_RAW_INPUT.out.fasta
             ch_versions                             = ch_versions.mix(SC_RAW_INPUT.out.versions)
             ch_cellranger_airr                      = SC_RAW_INPUT.out.airr
@@ -122,8 +124,6 @@ workflow AIRRFLOW {
             ch_fastqc_postassembly_mqc              = Channel.empty()
         } else {
             // Perform sequence assembly if input type is fastq from bulk sequencing data
-            // TODO make this part run from ch_reads_split.bulk! -> other input, FASTQ_INPUT_CHECK is not needed then anymore
-
             SEQUENCE_ASSEMBLY(
                 ch_input,
                 DATABASES.out.igblast.collect()

From 2bb6952c58ebd66cf0a1fe75883365f7767d12ec Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sun, 18 Feb 2024 22:12:22 -0500
Subject: [PATCH 2/6] merge fastqs with multiple lanes

---
 CHANGELOG.md                              |  1 +
 bin/check_samplesheet.py                  |  9 ++-
 modules.json                              |  5 ++
 modules/nf-core/cat/fastq/environment.yml |  7 ++
 modules/nf-core/cat/fastq/main.nf         | 80 +++++++++++++++++++++++
 modules/nf-core/cat/fastq/meta.yml        | 42 ++++++++++++
 subworkflows/local/fastq_input_check.nf   | 47 +++++++++----
 7 files changed, 174 insertions(+), 17 deletions(-)
 create mode 100644 modules/nf-core/cat/fastq/environment.yml
 create mode 100644 modules/nf-core/cat/fastq/main.nf
 create mode 100644 modules/nf-core/cat/fastq/meta.yml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 944bc3e1..b88df086 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 - [#294](https://github.com/nf-core/airrflow/pull/294) Merge template updates nf-core/tools v2.11.1
 - [#299](https://github.com/nf-core/airrflow/pull/299) Add profile for common NEB and TAKARA protocols
+- Add possibility to merge multi-lane samples when starting from fastq files
 
 ### `Fixed`
 
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index e27d87e8..9867c446 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -160,9 +160,12 @@ def check_samplesheet(file_in, assembled):
 
         ## Check that sample ids are unique
         if len(tab["sample_id"]) != len(set(tab["sample_id"])):
-            print_error(
-                "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample."
-            )
+            if assembled:
+                print_error(
+                    "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample."
+                )
+            else:
+                print("WARNING: Sample IDs are not unique! FastQs with the same sample ID will be merged.")
 
         ## Check that pcr_target_locus is IG or TR
         for val in tab["pcr_target_locus"]:
diff --git a/modules.json b/modules.json
index 04cd992c..19799719 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
+                        "installed_by": ["modules"]
+                    },
                     "cellranger/mkvdjref": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml
new file mode 100644
index 00000000..bff93add
--- /dev/null
+++ b/modules/nf-core/cat/fastq/environment.yml
@@ -0,0 +1,7 @@
+name: cat_fastq
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - conda-forge::sed=4.7
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
new file mode 100644
index 00000000..3d963784
--- /dev/null
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -0,0 +1,80 @@
+process CAT_FASTQ {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'nf-core/ubuntu:20.04' }"
+
+    input:
+    tuple val(meta), path(reads, stageAs: "input*/*")
+
+    output:
+    tuple val(meta), path("*.merged.fastq.gz"), emit: reads
+    path "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
+    if (meta.single_end) {
+        if (readList.size >= 1) {
+            """
+            cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
+
+            cat <<-END_VERSIONS > versions.yml
+            "${task.process}":
+                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+            END_VERSIONS
+            """
+        }
+    } else {
+        if (readList.size >= 2) {
+            def read1 = []
+            def read2 = []
+            readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
+            """
+            cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
+            cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
+
+            cat <<-END_VERSIONS > versions.yml
+            "${task.process}":
+                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+            END_VERSIONS
+            """
+        }
+    }
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
+    if (meta.single_end) {
+        if (readList.size > 1) {
+            """
+            touch ${prefix}.merged.fastq.gz
+
+            cat <<-END_VERSIONS > versions.yml
+            "${task.process}":
+                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+            END_VERSIONS
+            """
+        }
+    } else {
+        if (readList.size > 2) {
+            """
+            touch ${prefix}_1.merged.fastq.gz
+            touch ${prefix}_2.merged.fastq.gz
+
+            cat <<-END_VERSIONS > versions.yml
+            "${task.process}":
+                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+            END_VERSIONS
+            """
+        }
+    }
+
+}
diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml
new file mode 100644
index 00000000..db4ac3c7
--- /dev/null
+++ b/modules/nf-core/cat/fastq/meta.yml
@@ -0,0 +1,42 @@
+name: cat_fastq
+description: Concatenates fastq files
+keywords:
+  - cat
+  - fastq
+  - concatenate
+tools:
+  - cat:
+      description: |
+        The cat utility reads files sequentially, writing them to the standard output.
+      documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files to be concatenated.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: Merged fastq file
+      pattern: "*.{merged.fastq.gz}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf
index 52f87190..b5165871 100644
--- a/subworkflows/local/fastq_input_check.nf
+++ b/subworkflows/local/fastq_input_check.nf
@@ -3,8 +3,7 @@
  */
 
 include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
-//TODO: when enchantr supports input samplesheet from raw sequencing, update code here to commented one.
-//include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input'
+include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main'
 
 workflow FASTQ_INPUT_CHECK {
     take:
@@ -15,22 +14,41 @@ workflow FASTQ_INPUT_CHECK {
         .tsv
         .splitCsv ( header:true, sep:'\t' )
         .map { create_fastq_channels(it) }
+        .dump (tag: 'fastq_channel_before_merge_samples')
+        .groupTuple(by: [0])
+        .dump(tag: 'fastq_channel_after_merge_samples_grouped')
+        .branch {
+            meta, fastqs ->
+                single: fastqs.size() == 1
+                    return [ meta, fastqs.flatten() ]
+                multiple: fastqs.size() > 1
+                    return [ meta, fastqs.flatten() ]
+        }
         .set { ch_reads }
-    // VALIDATE_INPUT(
-    //     samplesheet,
-    //     params.miairr,
-    //     params.collapseby,
-    //     params.cloneby
-    // )
+    ch_versions = SAMPLESHEET_CHECK.out.versions
+
+    // Merge multi-lane sample fastq for protocols except for 10x genomics (cellranger handles multi-fastq per sample)
+    if (params.library_generation_method == 'sc_10x_genomics') {
+
+        ch_merged_reads = ch_reads.single.mix( ch_reads.multiple )
+
+    } else {
+
+        CAT_FASTQ (
+            ch_reads.multiple
+        )
+        .reads
+        .mix( ch_reads.single )
+        .dump (tag: 'fastq_channel_after_merge_samples')
+        .set { ch_merged_reads }
 
-    // VALIDATE_INPUT.out.validated_input
-    //                     .splitCsv(header: true, sep:'\t')
-    //                     .map { get_meta(it) }
-    //                     .set{ ch_reads }
+        ch_versions = ch_versions.mix( CAT_FASTQ.out.versions )
+
+    }
 
     emit:
-    reads = ch_reads // channel: [ val(meta), [ reads ] ]
-    versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
+    reads = ch_merged_reads // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions // channel: [ versions.yml ]
     samplesheet = SAMPLESHEET_CHECK.out.tsv // tsv metadata file
 }
 
@@ -47,6 +65,7 @@ def create_fastq_channels(LinkedHashMap col) {
     meta.filetype           = "fastq"
     meta.single_cell        = col.single_cell.toLowerCase()
     meta.locus              = col.pcr_target_locus
+    meta.single_end         = false
 
     def array = []
     if (!file(col.filename_R1).exists()) {

From f1caa7b8c8b828ffb2c42ad03420e002d1554828 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sun, 18 Feb 2024 22:27:20 -0500
Subject: [PATCH 3/6] fix text

---
 subworkflows/local/sc_raw_input.nf | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf
index 25e5b8c9..109947b8 100644
--- a/subworkflows/local/sc_raw_input.nf
+++ b/subworkflows/local/sc_raw_input.nf
@@ -27,15 +27,15 @@ workflow SC_RAW_INPUT {
 
     // validate library generation method parameter
     if (params.vprimers) {
-        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
+        error "The single-cell 10X genomics library generation method does not require V-region primers, please provide a reference file instead or select another library method option."
     } else if (params.race_linker) {
-        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option."
+        error "The single-cell 10X genomics library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option."
     }
     if (params.cprimers)  {
-        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require C-region primers, please provide a reference file instead or select another library method option."
+        error "The single-cell 10X genomics library generation method does not require C-region primers, please provide a reference file instead or select another library method option."
     }
     if (params.umi_length > 0)  {
-        error "The transcript-specific primer, 5'-RACE, UMI library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
+        error "The single-cell 10X genomics library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option."
     }
     if (params.reference_10x)  {
         // necessary to allow tar.gz files as input so that tests can run
@@ -48,7 +48,7 @@ workflow SC_RAW_INPUT {
             ch_sc_reference = Channel.fromPath(params.reference_10x, checkIfExists: true)
         }
     } else {
-        error "The transcript-specific primer, 5'-RACE, UMI library generation method requires you to provide a reference file."
+        error "The single-cell 10X genomics library generation method requires you to provide a reference file."
     }
 
     // run cellranger vdj
@@ -84,6 +84,7 @@ workflow SC_RAW_INPUT {
 
     // TODO: here you can add support for MiXCR sc protocols.
 
+
     emit:
     versions = ch_versions
     // complete cellranger output

From 7b8b8c9a8f15554e032bcbe00c7fa59e8c2e5f82 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Sun, 18 Feb 2024 22:29:27 -0500
Subject: [PATCH 4/6] fix lint

---
 workflows/airrflow.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf
index 4fcc6f18..ede383fa 100644
--- a/workflows/airrflow.nf
+++ b/workflows/airrflow.nf
@@ -145,7 +145,7 @@ workflow AIRRFLOW {
             ch_presto_assemblepairs_logs    = SEQUENCE_ASSEMBLY.out.presto_assemblepairs_logs
             ch_presto_collapseseq_logs      = SEQUENCE_ASSEMBLY.out.presto_collapseseq_logs
             ch_presto_splitseq_logs         = SEQUENCE_ASSEMBLY.out.presto_splitseq_logs
-       }
+        }
 
     } else if ( params.mode == "assembled" ) {
 

From 68a73290accfeba97b83ded4ab4e4528e5f9d799 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Mon, 19 Feb 2024 09:01:04 -0500
Subject: [PATCH 5/6] fix metadata merge

---
 bin/reveal_add_metadata.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bin/reveal_add_metadata.R b/bin/reveal_add_metadata.R
index f2ff5b5f..1745da89 100755
--- a/bin/reveal_add_metadata.R
+++ b/bin/reveal_add_metadata.R
@@ -61,8 +61,12 @@ if (!("INPUTID" %in% names(opt))) {
 # Read metadata file
 metadata <- read.csv(opt$METADATA, sep = "\t", header = TRUE, stringsAsFactors = F)
 
+# Merging samples over multiple lanes introduces multi-rows per sample
+# We expect only one row per sample
 metadata <- metadata %>%
-    filter(sample_id == opt$INPUTID)
+    dplyr::filter(sample_id == opt$INPUTID) %>%
+    dplyr::select(!starts_with("filename_")) %>%
+    dplyr::distinct()
 
 if (nrow(metadata) != 1) {
     stop("Expecting nrow(metadata) == 1; nrow(metadata) == ", nrow(metadata), " found")
@@ -81,10 +85,7 @@ internal_fields <-
         "id",
         "filetype",
         "valid_single_cell",
-        "valid_pcr_target_locus",
-        "filename_R1",
-        "filename_R2",
-        "filename_I1"
+        "valid_pcr_target_locus"
     )
 metadata <- metadata[, !colnames(metadata) %in% internal_fields]
 

From afa4c1dbbad344ec6156855a5673573e874f9cd0 Mon Sep 17 00:00:00 2001
From: Gisela Gabernet <gisela.gabernet@gmail.com>
Date: Tue, 20 Feb 2024 10:00:53 -0500
Subject: [PATCH 6/6] add collect

---
 subworkflows/local/sc_raw_input.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf
index 109947b8..8f46cbfd 100644
--- a/subworkflows/local/sc_raw_input.nf
+++ b/subworkflows/local/sc_raw_input.nf
@@ -53,8 +53,8 @@ workflow SC_RAW_INPUT {
 
     // run cellranger vdj
     CELLRANGER_VDJ (
-    ch_reads,
-    ch_sc_reference
+        ch_reads,
+        ch_sc_reference.collect()
     )
     ch_versions = ch_versions.mix(CELLRANGER_VDJ.out.versions)