From b348ce8f25b921eb6c180c4f639094d1787a5d7a Mon Sep 17 00:00:00 2001 From: Phillip Dexheimer Date: Wed, 10 Sep 2014 22:16:12 -0400 Subject: [PATCH] Added -disableOptimizations argument to HaplotypeCaller. * This argument is intended to be used in conjunction with -bamout, and disable early-exit optimizations to allow reference regions to be contained in the output bam * Also forcibly includes the reference haplotype in the set of haplotypes given to the BAMWriter * Made -dontTrimActiveRegions visible, as it is likely also desirable in this use case * Addresses PT 77731660 --- .../haplotypecaller/ActiveRegionTrimmer.java | 3 ++- .../haplotypecaller/HaplotypeCaller.java | 22 ++++++++++++++----- .../HaplotypeCallerGVCFIntegrationTest.java | 1 + .../HaplotypeCallerIntegrationTest.java | 3 +-- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java index 11f76139e..3fa393eb0 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java @@ -52,6 +52,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; import org.apache.log4j.Logger; +import org.broadinstitute.gatk.utils.commandline.Advanced; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Hidden; import org.broadinstitute.gatk.utils.GenomeLoc; @@ -94,7 +95,7 @@ class ActiveRegionTrimmer { */ private boolean emitReferenceConfidence; - @Hidden + @Advanced @Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false) protected boolean dontTrimActiveRegions = false; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 586a8acaa..0ac337038 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -292,6 +292,16 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false) public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES; + /** + * If set, certain "early exit" optimizations in HaplotypeCaller will be disabled. This is most likely to be useful if + * you're using the -bamout argument to examine the placement of reads following reassembly and are interested in the + * reads in regions with no variations. The -forceActive and -dontTrimActiveRegions arguments may also be necessary. + */ + @Advanced + @Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Should HaplotypeCaller skip expensive calculations in active regions with no variants?", + required = false) + private boolean disableOptimizations = false; + /** * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * dbSNP is not used in any way for the calculations themselves. @@ -962,7 +972,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final ActiveRegionTrimmer.Result trimmingResult = trimmer.trim(originalActiveRegion,allVariationEvents); - if (!trimmingResult.isVariationPresent()) + if (!trimmingResult.isVariationPresent() && !disableOptimizations) return referenceModelForNoVariation(originalActiveRegion,false); final AssemblyResultSet assemblyResult = @@ -980,7 +990,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // abort early if something is out of the acceptable range // TODO is this ever true at this point??? perhaps GGA. Need to check. - if( ! assemblyResult.isVariationPresent() ) + if( ! assemblyResult.isVariationPresent() && ! disableOptimizations) return referenceModelForNoVariation(originalActiveRegion, false); // For sure this is not true if gVCF is on. @@ -988,7 +998,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // TODO is this ever true at this point??? perhaps GGA. Need to check. - if( regionForGenotyping.size() == 0 ) { + if( regionForGenotyping.size() == 0 && ! disableOptimizations) { // no reads remain after filtering so nothing else to do! return referenceModelForNoVariation(originalActiveRegion, false); } @@ -1024,13 +1034,15 @@ public class HaplotypeCaller extends ActiveRegionWalker, In (consensusMode ? Collections.emptyList() : givenAlleles), emitReferenceConfidence()); - // TODO -- must disable if we are doing NCT, or set the output type of ! presorted if ( bamWriter != null ) { + final Set calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes()); + if (disableOptimizations) + calledHaplotypeSet.add(assemblyResult.getReferenceHaplotype()); haplotypeBAMWriter.writeReadsAlignedToHaplotypes( haplotypes, assemblyResult.getPaddedReferenceLoc(), haplotypes, - calledHaplotypes.getCalledHaplotypes(), + calledHaplotypeSet, readLikelihoods); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 49719636e..d00444202 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -100,6 +100,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878", ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals + " -disableOptimizations", NA12878bandedResolutionMD5}); return tests.toArray(new Object[][]{}); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 3e2b7eb56..14dd1cf76 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -353,8 +353,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testLackSensitivityDueToBadHaplotypeSelectionFix() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", - b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list", - HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list"); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c")); spec.disableShadowBCF(); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);