-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathneoflow_hlatyping.nf
146 lines (104 loc) · 3.41 KB
/
neoflow_hlatyping.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env nextflow
params.help = false
params.reads = null
params.hla_ref_dir = "./"
params.seqtype = "dna"
params.singleEnd = false
params.out_dir = "./"
params.cpu = 6
/* Prints help when asked for and exits */
def helpMessage() {
log.info"""
=========================================
neoflow => HLA typing
=========================================
Usage:
nextflow run neoflow_hlatyping.nf
Arguments:
--reads Reads data in fastq.gz or fastq format. For example, "*_{1,2}.fq.gz"
--hla_ref_dir HLA reference folder
--seqtype Read type, dna or rna. Default is dna.
--singleEnd Single end or not, default is false (pair end reads)
--cpu The number of CPUs, default is 6.
--out_dir Output folder, default is "./"
--help Print help message
""".stripIndent()
}
// Show help emssage
if (params.help){
helpMessage()
exit 0
}
out_dir = file(params.out_dir)
if(!out_dir.isDirectory()){
out_dir_result = out_dir.mkdirs()
println out_dir_result ? "Create folder: $out_dir!" : "Cannot create directory: $myDir!"
}
hla_reference_dir = file(params.hla_ref_dir)
Channel.fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs" + "to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." }
.set { input_data }
process reads_mapping {
echo true
tag "reads_mapping"
container "biocontainers/bwa:0.7.15"
cpus "$params.cpu"
input:
file hla_reference_dir
set val(pattern), file(reads) from input_data
output:
set val(pattern), "mapped_{1,2}.sam" into mapped_reads
script:
if(params.singleEnd == true)
"""
bwa mem -t ${params.cpu} -M ${hla_reference_dir}/hla_reference_dna.fasta ${reads} > mapped_1.sam
"""
else
"""
bwa mem -t ${params.cpu} -M ${hla_reference_dir}/hla_reference_dna.fasta ${reads[0]} > mapped_1.sam
bwa mem -t ${params.cpu} -M ${hla_reference_dir}/hla_reference_dna.fasta ${reads[1]} > mapped_2.sam
"""
}
process run_samtools{
container "zhanglab18/samtools:1.9"
cpus "$params.cpu"
input:
file hla_reference_dir
set val(pattern), file(reads) from mapped_reads
output:
set val(pattern), "mapped_{1,2}.fastq" into fished_reads
script:
if(params.singleEnd == true)
"""
# samtools view -bS mapped_1.sam > mapped_1.bam
samtools view -@ ${params.cpu} -h -F 4 -b1 mapped_1.sam > mapped_1.bam
samtools fastq mapped_1.bam > mapped_1.fastq
"""
else
"""
# samtools view -bS mapped_1.sam > mapped_1.bam
# samtools view -bS mapped_2.sam > mapped_2.bam
samtools view -@ ${params.cpu} -h -F 4 -b1 mapped_1.sam > mapped_1.bam
samtools view -@ ${params.cpu} -h -F 4 -b1 mapped_2.sam > mapped_2.bam
samtools fastq mapped_1.bam > mapped_1.fastq
samtools fastq mapped_2.bam > mapped_2.fastq
"""
}
process run_optitype {
tag "run_optitype"
container "zhanglab18/optitype:1.3.1"
cpus "$params.cpu"
publishDir "${out_dir}/hla_type/", mode: "copy", overwrite: true
input:
set val(pattern), file(reads) from fished_reads
output:
file("${pattern}") into res
script:
"""
python /opt/OptiType/OptiTypePipeline.py \
--input ${reads} \
--prefix ${pattern} \
--outdir ${pattern} \
--${params.seqtype}
"""
}