-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathliftover_vcf_picard.wdl
171 lines (141 loc) · 3.9 KB
/
liftover_vcf_picard.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
version 1.0
workflow liftover_vcf {
input {
File vcf_file
String chain_url
File target_fasta
String out_prefix
Int? mem_gb
}
call picard {
input: vcf_file = vcf_file,
chain_url = chain_url,
target_fasta = target_fasta,
out_prefix = out_prefix,
mem_gb = mem_gb
}
call strand_flip {
input: vcf_file = vcf_file,
rejects_file = picard.rejects_file,
out_prefix = out_prefix
}
call picard as picard2 {
input: vcf_file = strand_flip.out_file,
chain_url = chain_url,
target_fasta = target_fasta,
out_prefix = out_prefix,
mem_gb = mem_gb
}
if (picard2.num_rejects < picard.num_rejects) {
call merge_vcf {
input: vcf_files = [picard.out_file, picard2.out_file],
out_prefix = out_prefix
}
}
File lifted_file = select_first([merge_vcf.out_file, picard.out_file])
call md5 {
input: file = lifted_file
}
output {
File out_file = lifted_file
File rejects_file = picard2.rejects_file
Int num_rejects = picard2.num_rejects
String md5sum = md5.md5sum
}
meta {
author: "Stephanie Gogarten"
email: "sdmorris@uw.edu"
}
}
task picard {
input {
File vcf_file
String chain_url
File target_fasta
String out_prefix
Int mem_gb = 16
}
String chain_file = basename(chain_url)
command <<<
curl ~{chain_url} --output ~{chain_file}
java -Xmx~{mem_gb}g -jar /usr/picard/picard.jar CreateSequenceDictionary \
--REFERENCE ~{target_fasta}
java -Xmx~{mem_gb}g -jar /usr/picard/picard.jar LiftoverVcf \
--CHAIN ~{chain_file} \
--INPUT ~{vcf_file} \
--OUTPUT ~{out_prefix}.vcf.gz \
--REJECT rejected_variants.vcf.gz \
--REFERENCE_SEQUENCE ~{target_fasta} \
--RECOVER_SWAPPED_REF_ALT true \
--ALLOW_MISSING_FIELDS_IN_HEADER true \
--MAX_RECORDS_IN_RAM 10000
zcat rejected_variants.vcf.gz | grep -v "^#" | wc -l > num_rejects.txt
>>>
output {
File out_file = "~{out_prefix}.vcf.gz"
File rejects_file = "rejected_variants.vcf.gz"
Int num_rejects = read_int("num_rejects.txt")
}
runtime {
docker: "broadinstitute/picard:2.27.5"
memory: "~{mem_gb}GB"
}
}
task strand_flip {
input {
File vcf_file
File rejects_file
String out_prefix
}
command <<<
has_chr=$(zcat ~{vcf_file} | grep -F 'contig=<ID=chr' -c -m 1)
if [ "$has_chr" -gt 0 ]
then
chr_prefix='chrM'
else
chr_prefix='M'
fi
zcat ~{rejects_file} | grep -v "^#" | cut -f3 > flip.txt
plink --vcf ~{vcf_file} --double-id \
--extract flip.txt --flip flip.txt \
--output-chr $chr_prefix \
--recode vcf-iid bgz --out ~{out_prefix}_flipped
>>>
output {
File out_file = "~{out_prefix}_flipped.vcf.gz"
}
runtime {
docker: "quay.io/biocontainers/plink:1.90b6.21--hec16e2b_2"
}
}
task merge_vcf {
input {
Array[File] vcf_files
String out_prefix
}
command <<<
for f in ~{sep=' ' vcf_files}; do bcftools index $f; done
bcftools concat --allow-overlaps ~{sep=' ' vcf_files} \
-O z -o ~{out_prefix}.vcf.gz
>>>
output {
File out_file = "~{out_prefix}.vcf.gz"
}
runtime {
docker: "staphb/bcftools:1.16"
}
}
task md5 {
input {
File file
}
command <<<
md5sum ~{file} | cut -d " " -f 1 > md5.txt
>>>
output {
String md5sum = read_string("md5.txt")
}
runtime {
docker: "ubuntu:18.04"
}
}