diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java index 11f76139e..3fa393eb0 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ActiveRegionTrimmer.java @@ -52,6 +52,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; import org.apache.log4j.Logger; +import org.broadinstitute.gatk.utils.commandline.Advanced; import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Hidden; import org.broadinstitute.gatk.utils.GenomeLoc; @@ -94,7 +95,7 @@ class ActiveRegionTrimmer { */ private boolean emitReferenceConfidence; - @Hidden + @Advanced @Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false) protected boolean dontTrimActiveRegions = false; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 586a8acaa..0ac337038 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -292,6 +292,16 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false) public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES; + /** + * If set, certain "early exit" optimizations in HaplotypeCaller will be disabled. This is most likely to be useful if + * you're using the -bamout argument to examine the placement of reads following reassembly and are interested in the + * reads in regions with no variations. The -forceActive and -dontTrimActiveRegions arguments may also be necessary. + */ + @Advanced + @Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Should HaplotypeCaller skip expensive calculations in active regions with no variants?", + required = false) + private boolean disableOptimizations = false; + /** * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * dbSNP is not used in any way for the calculations themselves. @@ -962,7 +972,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final ActiveRegionTrimmer.Result trimmingResult = trimmer.trim(originalActiveRegion,allVariationEvents); - if (!trimmingResult.isVariationPresent()) + if (!trimmingResult.isVariationPresent() && !disableOptimizations) return referenceModelForNoVariation(originalActiveRegion,false); final AssemblyResultSet assemblyResult = @@ -980,7 +990,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // abort early if something is out of the acceptable range // TODO is this ever true at this point??? perhaps GGA. Need to check. - if( ! assemblyResult.isVariationPresent() ) + if( ! assemblyResult.isVariationPresent() && ! disableOptimizations) return referenceModelForNoVariation(originalActiveRegion, false); // For sure this is not true if gVCF is on. @@ -988,7 +998,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // TODO is this ever true at this point??? perhaps GGA. Need to check. - if( regionForGenotyping.size() == 0 ) { + if( regionForGenotyping.size() == 0 && ! disableOptimizations) { // no reads remain after filtering so nothing else to do! return referenceModelForNoVariation(originalActiveRegion, false); } @@ -1024,13 +1034,15 @@ public class HaplotypeCaller extends ActiveRegionWalker, In (consensusMode ? Collections.emptyList() : givenAlleles), emitReferenceConfidence()); - // TODO -- must disable if we are doing NCT, or set the output type of ! presorted if ( bamWriter != null ) { + final Set calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes()); + if (disableOptimizations) + calledHaplotypeSet.add(assemblyResult.getReferenceHaplotype()); haplotypeBAMWriter.writeReadsAlignedToHaplotypes( haplotypes, assemblyResult.getPaddedReferenceLoc(), haplotypes, - calledHaplotypes.getCalledHaplotypes(), + calledHaplotypeSet, readLikelihoods); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 49719636e..d00444202 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -100,6 +100,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878", ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals + " -disableOptimizations", NA12878bandedResolutionMD5}); return tests.toArray(new Object[][]{}); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 3e2b7eb56..14dd1cf76 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -353,8 +353,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testLackSensitivityDueToBadHaplotypeSelectionFix() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", - b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list", - HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list"); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c")); spec.disableShadowBCF(); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);