-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbcftools_merge.wdl
119 lines (94 loc) · 2.74 KB
/
bcftools_merge.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
version 1.0
workflow bcftools_merge {
input {
Array[Array[File]] vcf_files
Array[String] output_prefixes
Boolean missing_to_ref = false
String? merge_options
Int mem_gb = 16
}
scatter (pair in zip(vcf_files, output_prefixes)) {
scatter (vcf_file in pair.left) {
call create_index_file {
input: vcf_file = vcf_file,
output_prefix = "index"
}
}
call merge_vcfs {
input: vcf_files = pair.left,
out_prefix = pair.right,
mem_gb = mem_gb,
missing_to_ref = missing_to_ref,
merge_options = merge_options,
index_files = create_index_file.index_file
}
}
output {
Array[File] out_files = merge_vcfs.out_file
Array[File] out_index_files = merge_vcfs.out_index_file
}
meta {
author: "Adrienne Stilp"
email: "amstilp@uw.edu"
}
}
task create_index_file {
input {
File vcf_file
String? output_prefix = "index"
}
Int disk_size = ceil(2 * size(vcf_file, "GB")) + 2
command <<<
echo {~vcf_file}
bcftools index \
~{vcf_file} \
-o ~{output_prefix}.csi
>>>
output {
File index_file = "~{output_prefix}.csi"
}
runtime {
docker: "nanozoo/bcftools:1.19--1dccf69"
disks: "local-disk " + disk_size + " SSD"
}
}
task merge_vcfs {
input {
Array[File] vcf_files
Array[File] index_files
String? merge_options
String out_prefix
Int mem_gb = 16
Boolean missing_to_ref = false
}
Int disk_size = ceil(3*(size(vcf_files, "GB"))) + 10
command <<<
set -e -o pipefail
echo "writing input file"
VCF_ARRAY=(~{sep=" " vcf_files}) # Load array into bash variable
INDEX_ARRAY=(~{sep=" " index_files}) # Load array into bash variable
for idx in ${!VCF_ARRAY[*]}
do
echo "${VCF_ARRAY[$idx]}##idx##${INDEX_ARRAY[$idx]}"
done > files.txt
echo "printing files to merge"
cat files.txt
echo "Merging files..."
# Merge files.
bcftools merge \
-l files.txt \
~{if missing_to_ref then "--missing-to-ref" else ""} \
~{if defined(merge_options) then merge_options else ""} \
-o ~{out_prefix}.vcf.gz \
--write-index
>>>
output {
File out_file = "~{out_prefix}.vcf.gz"
File out_index_file = "~{out_prefix}.vcf.gz.csi"
}
runtime {
docker: "nanozoo/bcftools:1.19--1dccf69"
disks: "local-disk " + disk_size + " SSD"
memory: mem_gb + " GB"
}
}