forked from greninger-lab/lava
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
275 lines (224 loc) · 8.29 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#!/usr/bin/env nextflow
/*
========================================================================================
LAVA
========================================================================================
Longitudinal Analysis of Viral Alleles
#### Homepage / Documentation
https://github.com/vpeddu/lava
----------------------------------------------------------------------------------------
*/
// Using the Nextflow DSL-2 to account for the logic flow of this workflow
nextflow.preview.dsl=2
def helpMessage() {
log.info"""
LAVA: Longitudinal Analysis of Viral Alleles
Usage:
An example command for running the pipeline is as follows:
nextflow run FredHutch/CLOMP \\
--PAIRED_END \\
--HOST_FILTER_FIRST \\
--OUTDIR output/
--CONTROL_FASTQ The fastq reads for the first sample in
your longitudinal analysis [REQUIRED]
--METADATA Required argument: A two column csv - the first column is the
path to all the fastqs you wish to include in your analysis.
All fastqs that you want to include need to be specified in
this file AND be located in the folder from which you are
running lava. The second column is the temporal seperation
between the samples. This is unitless so you can input
passage number, days, or whatever condition your experiment
happens to have. [REQUIRED]
--OUTDIR Output directory [REQUIRED]
--FASTA Specify a reference fasta with the majority consensus of the
control fastq. This option must be used with the -g flag to
specify the protein annotations relative to the start of this
fasta. [REQUIRED IF NOT --GENBANK]
--GFF Specify a reference gff file with the protein annotations for
the reference fasta supplied with the -f flag. This option
must be paired with the -f flag. [REQUIRED IF NOT GENBANK]
--GENBANK Provide a Genbank accession number. This record will be used
to generate a majority consensus from the control fastq, and
this consensus will be annotated from the downloaded genbank
record as well. [REQUIRED IF NOT --FASTA + --GFF]
--AF pecify an allele frequency percentage to cut off
- with a minimum of 1 percent - in whole numbers. default = ' '
--NUC Results are listed as nucleotide changes not amino acid
changes. Do not use with -png.
--ALLELE_FREQ Specify an allele frequency percentage to cut off - with a
minimum of 1 percent - in whole numbers.
--PNG Output results as a png. Do not use with -nuc.
--DEDUPLICATE Optional flag, will perform automatic removal of PCR
duplicates via DeDup.
""".stripIndent()
}
/*
* SET UP CONFIGURATION VARIABLES
*/
// Show help message
params.help = false
if (params.help){
helpMessage()
exit 0
}
params.OUTDIR= false
params.GENBANK = 'False'
params.GFF = 'False'
params.FASTA = 'NO_FILE'
params.DEDUPLICATE = 'false'
params.ALLELE_FREQ = 'NO_VAL'
METADATA_FILE = file(params.METADATA)
/*
* Import the processes used in this workflow
*/
include CreateGFF from './Modules.nf'
include Alignment_prep from './Modules.nf'
include Align_samples from './Modules.nf'
include Pipeline_prep from './Modules.nf'
include Create_VCF from './Modules.nf'
include Ref_done from './Modules.nf'
include Extract_variants from './Modules.nf'
include Annotate_complex from './Modules.nf'
include Annotate_complex_first_passage from './Modules.nf'
include Generate_output from './Modules.nf'
CONTROL_FASTQ = file(params.CONTROL_FASTQ)
FASTA = file(params.FASTA)
//input_read_ch = Channel
// Error handling for input flags
//if CONTROL_FASTQ not set
if (!params.CONTROL_FASTQ){
println("Must provide control FASTQ with --ControlFastq")
exit(1)
}
//if OUTDIR not set
if (params.OUTDIR == false) {
println( "Must provide an output directory with --OUTDIR")
exit(1)
}
// If --GENBANK and --FASTA or --GFF are specified at the same time
if(((params.GENBANK != "False") && (params.FASTA != "NO_FILE"))){
println("bruh --GENBANK cannot be used with --FASTA or --GFF")
exit(1)
}
if(((params.GENBANK != "False") && (params.GFF != "False"))){
println("bruh --GENBANK cannot be used with --FASTA or --GFF")
exit(1)
}
// If --FASTA without --GENBANK or vice versa
if( (params.FASTA != "NO_FILE") && params.GFF == 'False'){
println('--GFF needs to be specified with --FASTA')
exit(1)
}
if( (params.GFF != "False") && params.FASTA == 'NO_FILE'){
println('--FASTA needs to be specified with --GFF')
exit(1)
}
// If no flags specified
if(params.GFF == "False" && params.FASTA == 'NO_FILE' && params.GENBANK == "False"){
println('Either --GENBANK or --FASTA + --GFF are required flags')
exit(1)
}
// Make sure OUTDIR ends with trailing slash
if (!params.OUTDIR.endsWith("/")){
params.OUTDIR = "${params.OUTDIR}/"
}
input_read_ch = Channel
.fromPath(METADATA_FILE)
.splitCsv(header:false)
input_read_ch = Channel
.fromPath(METADATA_FILE)
.splitCsv(header:true)
.map{ row-> tuple(file(row.Sample), (row.Passage)) }
// Run the workflow
workflow {
//fml()
log.info nfcoreHeader()
CreateGFF (
params.GENBANK,
CONTROL_FASTQ,
file(params.FASTA),
file(params.GFF)
)
Alignment_prep (
CreateGFF.out[0],
CreateGFF.out[1],
CreateGFF.out[2]
)
Align_samples (
input_read_ch,
Alignment_prep.out[0],
input_read_ch.first(),
params.DEDUPLICATE
)
Pipeline_prep (
Align_samples.out[0].collect(),
CreateGFF.out[2],
CreateGFF.out[3],
Alignment_prep.out[0]
)
Create_VCF (
CreateGFF.out[3],
Pipeline_prep.out[2],
Align_samples.out[0],
Alignment_prep.out[1],
input_read_ch.first(),
Alignment_prep.out[2]
)
Ref_done (
input_read_ch.first(),
params.ALLELE_FREQ,
Create_VCF.out[0],
CreateGFF.out[3],
Pipeline_prep.out[3],
Align_samples.out[1],
METADATA_FILE
)
Extract_variants (
input_read_ch.first(),
Create_VCF.out[1],
METADATA_FILE
)
Annotate_complex(
Extract_variants.out[0]
)
Annotate_complex_first_passage(
Ref_done.out[0],
)
Generate_output(
Annotate_complex_first_passage.out,
Annotate_complex.out[0].collect(),
Annotate_complex.out[1].collect(),
Annotate_complex.out[2].collect(),
Annotate_complex.out[3].collect(),
Pipeline_prep.out[0],
Pipeline_prep.out[1],
Align_samples.out[2].collect(),
Create_VCF.out[2].collect()
)
publish:
Generate_output.out to: "${params.OUTDIR}" , mode: 'copy'
}
def nfcoreHeader() {
return """
ooO
ooOOOo
oOOOOOOoooo
ooOOOooo oooo
/vvv\\
/V V V\\
/V V V\\
/ \\ oh wow look at these alleles
/ \\ / /
/ \\ o o
__ / \\ /- o /-
/\\ / \\ /\\ -/- /\\
/\\
___ _______ __ __ _______
| | | _ || | | || _ |
| | | |_| || |_| || |_| |
| | | || || |
| |___ | || || |
| || _ | | | | _ |
|_______||__| |__| |___| |__| |__| Version 2
""".stripIndent()
}