Skip to content

Commit

Permalink
Merge pull request #305 from ggabernet/cellranger_vdj
Browse files Browse the repository at this point in the history
Updates cellranger_vdj
  • Loading branch information
ggabernet authored Feb 27, 2024
2 parents ebbd620 + afa4c1d commit a0ad699
Show file tree
Hide file tree
Showing 13 changed files with 229 additions and 84 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

- [#294](https://github.com/nf-core/airrflow/pull/294) Merge template updates nf-core/tools v2.11.1
- [#299](https://github.com/nf-core/airrflow/pull/299) Add profile for common NEB and TAKARA protocols
- Add possibility to merge multi-lane samples when starting from fastq files

### `Fixed`

Expand Down
9 changes: 6 additions & 3 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,12 @@ def check_samplesheet(file_in, assembled):

## Check that sample ids are unique
if len(tab["sample_id"]) != len(set(tab["sample_id"])):
print_error(
"Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample."
)
if assembled:
print_error(
"Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample."
)
else:
print("WARNING: Sample IDs are not unique! FastQs with the same sample ID will be merged.")

## Check that pcr_target_locus is IG or TR
for val in tab["pcr_target_locus"]:
Expand Down
11 changes: 6 additions & 5 deletions bin/reveal_add_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ if (!("INPUTID" %in% names(opt))) {
# Read metadata file
metadata <- read.csv(opt$METADATA, sep = "\t", header = TRUE, stringsAsFactors = F)

# Merging samples over multiple lanes introduces multi-rows per sample
# We expect only one row per sample
metadata <- metadata %>%
filter(sample_id == opt$INPUTID)
dplyr::filter(sample_id == opt$INPUTID) %>%
dplyr::select(!starts_with("filename_")) %>%
dplyr::distinct()

if (nrow(metadata) != 1) {
stop("Expecting nrow(metadata) == 1; nrow(metadata) == ", nrow(metadata), " found")
Expand All @@ -81,10 +85,7 @@ internal_fields <-
"id",
"filetype",
"valid_single_cell",
"valid_pcr_target_locus",
"filename_R1",
"filename_R2",
"filename_I1"
"valid_pcr_target_locus"
)
metadata <- metadata[, !colnames(metadata) %in% internal_fields]

Expand Down
1 change: 0 additions & 1 deletion conf/test_10x_sc.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ params {

// params
mode = 'fastq'
sc_raw = true
library_generation_method = 'sc_10x_genomics'
clonal_threshold = 0

Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"cat/fastq": {
"branch": "master",
"git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882",
"installed_by": ["modules"]
},
"cellranger/mkvdjref": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/cat/fastq/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 80 additions & 0 deletions modules/nf-core/cat/fastq/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions modules/nf-core/cat/fastq/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ params {
// Single cell raw input options
// -----------------------
reference_10x = null
sc_raw = false


// -----------------------
Expand Down Expand Up @@ -296,7 +295,6 @@ profiles {
test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' }
test_nocluster { includeConfig 'conf/test_nocluster.config' }
test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' }
test_igblast { includeConfig 'conf/test_igblast.config' }
test_10x_sc { includeConfig 'conf/test_10x_sc.config' }
test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' }
test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' }
Expand Down
6 changes: 0 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -459,12 +459,6 @@
"description": "Path to the reference directory required by cellranger. Can either be directory or tar.gz.",
"help_text": "See for [IMGT](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt) or [default](https://www.10xgenomics.com/support/software/cell-ranger/downloads).",
"fa_icon": "fas fa-database"
},
"sc_raw": {
"type": "boolean",
"description": "Must be given when raw single cell data should be run.",
"help_text": "Must be given when raw single cell data should be run.",
"fa_icon": "fas fa-database"
}
},
"help_text": "Options for running raw single cell data.",
Expand Down
47 changes: 33 additions & 14 deletions subworkflows/local/fastq_input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
*/

include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
//TODO: when enchantr supports input samplesheet from raw sequencing, update code here to commented one.
//include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input'
include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main'

workflow FASTQ_INPUT_CHECK {
take:
Expand All @@ -15,22 +14,41 @@ workflow FASTQ_INPUT_CHECK {
.tsv
.splitCsv ( header:true, sep:'\t' )
.map { create_fastq_channels(it) }
.dump (tag: 'fastq_channel_before_merge_samples')
.groupTuple(by: [0])
.dump(tag: 'fastq_channel_after_merge_samples_grouped')
.branch {
meta, fastqs ->
single: fastqs.size() == 1
return [ meta, fastqs.flatten() ]
multiple: fastqs.size() > 1
return [ meta, fastqs.flatten() ]
}
.set { ch_reads }
// VALIDATE_INPUT(
// samplesheet,
// params.miairr,
// params.collapseby,
// params.cloneby
// )
ch_versions = SAMPLESHEET_CHECK.out.versions

// Merge multi-lane sample fastq for protocols except for 10x genomics (cellranger handles multi-fastq per sample)
if (params.library_generation_method == 'sc_10x_genomics') {

ch_merged_reads = ch_reads.single.mix( ch_reads.multiple )

} else {

CAT_FASTQ (
ch_reads.multiple
)
.reads
.mix( ch_reads.single )
.dump (tag: 'fastq_channel_after_merge_samples')
.set { ch_merged_reads }

// VALIDATE_INPUT.out.validated_input
// .splitCsv(header: true, sep:'\t')
// .map { get_meta(it) }
// .set{ ch_reads }
ch_versions = ch_versions.mix( CAT_FASTQ.out.versions )

}

emit:
reads = ch_reads // channel: [ val(meta), [ reads ] ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
reads = ch_merged_reads // channel: [ val(meta), [ reads ] ]
versions = ch_versions // channel: [ versions.yml ]
samplesheet = SAMPLESHEET_CHECK.out.tsv // tsv metadata file
}

Expand All @@ -47,6 +65,7 @@ def create_fastq_channels(LinkedHashMap col) {
meta.filetype = "fastq"
meta.single_cell = col.single_cell.toLowerCase()
meta.locus = col.pcr_target_locus
meta.single_end = false

def array = []
if (!file(col.filename_R1).exists()) {
Expand Down
Loading

0 comments on commit a0ad699

Please sign in to comment.