Skip to content

Commit ae4c938

Browse files
author
bshifaw
authored
HC annot fix (#54)
* added bamout variable, added GQB annotation, AS_StandardAnnotation removed for vcf mode * Removed disksize suggestion in Readme * minor edit to HC task to set vcf_basenmame from with the task
1 parent 1654241 commit ae4c938

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@ it easier to configure the workflow.*
3939
#### Requirements/expectations
4040
- One or more GVCFs produced by HaplotypeCaller in GVCF mode
4141
- Bare minimum 50 samples. Gene panels are not supported.
42-
- When determining disk size in the JSON, use the guideline below
43-
- small_disk = (num_gvcfs / 10) + 10
44-
- medium_disk = (num_gvcfs * 15) + 10
45-
- huge_disk = num_gvcfs + 10
4642

4743
### Outputs
4844
- A VCF file and its index, filtered using variant quality score recalibration

haplotypecaller-gvcf-gatk4.wdl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ workflow HaplotypeCallerGvcf_GATK4 {
3939
File scattered_calling_intervals_list
4040

4141
Boolean make_gvcf = true
42-
String gatk_docker = "broadinstitute/gatk:4.1.4.0"
42+
Boolean make_bamout = false
43+
String gatk_docker = "broadinstitute/gatk:4.1.7.0"
4344
String gatk_path = "/gatk/gatk"
4445
String gitc_docker = "broadinstitute/genomes-in-the-cloud:2.3.1-1500064817"
4546
String samtools_path = "samtools"
@@ -89,6 +90,7 @@ workflow HaplotypeCallerGvcf_GATK4 {
8990
ref_fasta_index = ref_fasta_index,
9091
hc_scatter = hc_divisor,
9192
make_gvcf = make_gvcf,
93+
make_bamout = make_bamout,
9294
docker = gatk_docker,
9395
gatk_path = gatk_path
9496
}
@@ -168,6 +170,7 @@ task HaplotypeCaller {
168170
File ref_fasta_index
169171
Float? contamination
170172
Boolean make_gvcf
173+
Boolean make_bamout
171174
Int hc_scatter
172175

173176
String gatk_path
@@ -188,7 +191,10 @@ task HaplotypeCaller {
188191

189192
Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB") + size(ref_dict, "GB")
190193
Int disk_size = ceil(((size(input_bam, "GB") + 30) / hc_scatter) + ref_size) + 20
191-
194+
195+
String vcf_basename = if make_gvcf then basename(output_filename, ".gvcf") else basename(output_filename, ".vcf")
196+
String bamout_arg = if make_bamout then "-bamout ~{vcf_basename}.bamout.bam" else ""
197+
192198
parameter_meta {
193199
input_bam: {
194200
description: "a bam file",
@@ -208,8 +214,14 @@ task HaplotypeCaller {
208214
-I ~{input_bam} \
209215
-L ~{interval_list} \
210216
-O ~{output_filename} \
211-
-contamination ~{default=0 contamination} ~{true="-ERC GVCF" false="" make_gvcf} \
212-
-G StandardAnnotation -G AS_StandardAnnotation -G StandardHCAnnotation
217+
-contamination ~{default=0 contamination} \
218+
-G StandardAnnotation -G StandardHCAnnotation ~{true="-G AS_StandardAnnotation" false="" make_gvcf} \
219+
-GQB 10 -GQB 20 -GQB 30 -GQB 40 -GQB 50 -GQB 60 -GQB 70 -GQB 80 -GQB 90 \
220+
~{true="-ERC GVCF" false="" make_gvcf} \
221+
~{bamout_arg}
222+
223+
# Cromwell doesn't like optional task outputs, so we have to touch this file.
224+
touch ~{vcf_basename}.bamout.bam
213225
}
214226
runtime {
215227
docker: docker
@@ -220,6 +232,7 @@ task HaplotypeCaller {
220232
output {
221233
File output_vcf = "~{output_filename}"
222234
File output_vcf_index = "~{output_filename}.tbi"
235+
File bamout = "~{vcf_basename}.bamout.bam"
223236
}
224237
}
225238
# Merge GVCFs generated per-interval for the same sample

0 commit comments

Comments
 (0)