-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.nf
227 lines (177 loc) · 8.84 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
def helpMessage() {
log.info"""
================================================================
dual-crispr-process-nf
================================================================
DESCRIPTION
Process CRISPR and shRNA functional genetic screening data.
Usage:
nextflow run zuberlab/crispr-process-nf
Options:
--inputDir Input directory containing raw files. Either BAM or FASTQ files (R1 & R2).
The FASTQ files must be named <lane>_R1.fastq.gz and <lane>_R2.fastq.gz.
The BAM files must be named <lane>.bam.
(default: '01_raw')
--outputDir Output directory for processed files.
(default: '02_processed')
--library Path to sgRNA library file.
(default: 'library.txt')
The following columns are required:
- id: unique name of sgRNA
- gene: gene targeted by sgRNA
- sequence: nucleotide sequence of sgRNA
--barcodes Path to file containing barcodes for demultiplexing.
(default: 'barcodes.fasta')
The following columns are required:
- lane: name of BAM / FASTQ input file
- sample_name: name of demultiplexed sample
- barcode_R1: nucleotide sequence of the sample barcode on R1
- barcode_R2: nucleotide sequence of the sample barcode on R2
--barcode_demux_mismatches Number of mismatches allowed during demultiplexing
of barcode. (default: 0)
--barcode_demux_location Read location of the sample barcode. Only the specified read is used for demultiplexing.
Either 'both', 'forward' or 'reverse' (default: 'both')
--barcode_length Number of nucleotides in sample barcode.
(default: 4)
--spacer_seq_R1 Nucleotide sequence of spacer on R1 between
barcodes and sgRNA sequence.
(default: ATATCCCTTGGAGAAAAGCCTTGTTT)
--spacer_seq_R2 Nucleotide sequence of spacer on R2 between
barcodes and sgRNA sequence.
(default: CTTGCTATGCACTCTTGTGCTTAGCTCTGAAAC)
--guide_length Number of nucleotides in guide sequence. (default: 21)
--padding_bases_first_guide Nucleotides used for padding if first sgRNA are of
unequal length. Must be one of G, C, T, and A.
(default: GTT)
--padding_bases_matching_guide Nucleotides used for padding if matching sgRNA are of
unequal length. Must be one of G, C, T, and A.
(default: ACC)
Profiles:
standard local execution
apptainer local execution with apptainer
cbe SLURM execution with apptainer on CBE cluster
Docker:
zuberlab/dual-crispr-nf:1.0
Author:
Florian Andersch (florian.andersch@imp.ac.at)
""".stripIndent()
}
params.help = false
if (params.help) {
helpMessage()
exit 0
}
log.info ""
log.info " parameters "
log.info " =================================="
log.info " input directory : ${params.inputDir}"
log.info " output directory : ${params.outputDir}"
log.info " library file : ${params.library}"
log.info " barcode file : ${params.barcodes}"
log.info " barcode length : ${params.barcode_length}"
log.info " spacer seq R1 (nt) : ${params.spacer_seq_R1}"
log.info " spacer seq R2 (nt) : ${params.spacer_seq_R2}"
log.info " demultiplex mismatches : ${params.barcode_demux_mismatches}"
log.info " sample barcode location : ${params.barcode_demux_location}"
log.info " first guide padding base : ${params.padding_bases_first_guide}"
log.info " matching guide padding base : ${params.padding_bases_matching_guide}"
log.info " =================================="
log.info ""
// Import modules
include { BAM_TO_FASTQ } from './modules/bam_to_fastq'
include { TRIM_RANDOM_BARCODE } from './modules/trim_random_barcode'
include { PROCESS_BARCODES } from './modules/process_barcodes'
include { DEMULTIPLEX } from './modules/demultiplex'
include { TRIM_BARCODE_AND_SPACER } from './modules/trim_barcode_and_spacer'
include { PROCESS_LIBRARY } from './modules/process_library'
include { BOWTIE_INDEX } from './modules/bowtie_index'
include { ALIGN } from './modules/align'
include { COUNT } from './modules/count'
include { COMBINE_COUNTS } from './modules/combine_counts'
include { FASTQC } from './modules/fastqc'
include { MULTIQC } from './modules/multiqc'
// Define input channels
ch_input_bam = Channel.fromPath("${params.inputDir}/*.bam")
.map { file -> tuple(file.baseName, file) }
ch_input_fastq = Channel.fromPath("${params.inputDir}/*.fastq.gz")
.map {
file ->
def lane = file.name.toString().replaceAll(/_R[12]\.fastq\.gz$/, '')
tuple(lane, file)
}
.groupTuple()
ch_barcodes = Channel.fromPath(params.barcodes)
ch_library = Channel.fromPath(params.library)
// Main workflow
workflow {
// BAM to FASTQ conversion
ch_fastq_from_bam = BAM_TO_FASTQ(ch_input_bam)
// Combine BAM-derived and direct FASTQ inputs
ch_all_fastq = ch_fastq_from_bam.mix(ch_input_fastq)
// Trim random barcode
ch_trimmed_random = TRIM_RANDOM_BARCODE(ch_all_fastq)
// Process barcodes
ch_processed_barcodes = PROCESS_BARCODES(ch_barcodes)
// Combine processed barcodes with trimmed random barcodes
ch_trimmed_random_barcodes = ch_processed_barcodes
.flatten()
.map { barcode ->
def lane = barcode.name.toString().replaceAll(/_R[12]\.fasta$/, '')
[lane, barcode]
}
.groupTuple()
.join(ch_trimmed_random)
// Demultiplex
ch_demuxed = DEMULTIPLEX(ch_trimmed_random_barcodes)
// Flatten demultiplexed files
ch_demuxed_flattened = ch_demuxed
.flatMap { lane, files ->
files.collect { file ->
def id = file.name.toString().replaceAll(/_R[12]\.fastq\.gz$/, '')
[lane, id, file]
}
}.groupTuple(by: [0,1])
// Filter out samples with unknown barcodes
ch_demuxed_flattened_filtered = ch_demuxed_flattened
.filter { !it[1].toString().endsWith("#unknown") }
// Trim barcode and spacer
ch_trimmed_spacer = TRIM_BARCODE_AND_SPACER(ch_demuxed_flattened_filtered)
// Process library
ch_library_out = PROCESS_LIBRARY(ch_library)
// Bowtie index
ch_bt2_index = BOWTIE_INDEX(ch_library_out.fasta)
// Combine trimmed spacer files and bowtie index
ch_trimmed_spacer_combined = ch_trimmed_spacer
.combine(ch_bt2_index)
// Align
ch_aligned = ALIGN(ch_trimmed_spacer_combined)
// Group aligned files by lane
ch_grouped_aligned = ch_aligned.alignedFiles
.map { lane, id, file -> tuple(lane, file) }
.groupTuple()
.combine(ch_library_out.saf)
// Count
ch_counted = COUNT(ch_grouped_aligned)
// Combine counts
COMBINE_COUNTS(ch_counted.countedFiles.collect(), ch_library)
// collect all fastq files
ch_fastq_files = ch_all_fastq
.mix(ch_demuxed_flattened.map { lane, baseName, file -> tuple(baseName, file) })
// FastQC
ch_fastqc = FASTQC(ch_fastq_files)
// Combine FASTQC files, alignment results, and featureCounts results
ch_multiqqc_files = ch_fastqc.collect()
.mix(ch_aligned.alignResults.collect())
.mix(ch_aligned.alignStats.collect())
.mix(ch_aligned.alignFlagstats.collect())
.mix(ch_counted.featureCountsResults.collect())
.collect()
// MultiQC
MULTIQC(ch_multiqqc_files)
}
// On completion
workflow.onComplete {
println ( workflow.success ? "COMPLETED!" : "FAILED" )
}