Merge pull request #751 from broadinstitute/pd_bamout_ref_regions

Added -disableOptimizations argument to HaplotypeCaller.
This commit is contained in:
rpoplin 2014-10-21 11:50:46 -04:00
commit 312a833db3
4 changed files with 21 additions and 8 deletions

View File

@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller; package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument; import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Hidden; import org.broadinstitute.gatk.utils.commandline.Hidden;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
@ -94,7 +95,7 @@ class ActiveRegionTrimmer {
*/ */
private boolean emitReferenceConfidence; private boolean emitReferenceConfidence;
@Hidden @Advanced
@Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false) @Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false)
protected boolean dontTrimActiveRegions = false; protected boolean dontTrimActiveRegions = false;

View File

@ -292,6 +292,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false) @Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false)
public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES; public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES;
/**
* If set, certain "early exit" optimizations in HaplotypeCaller will be disabled. This is most likely to be useful if
* you're using the -bamout argument to examine the placement of reads following reassembly and are interested in the
* reads in regions with no variations. The -forceActive and -dontTrimActiveRegions arguments may also be necessary.
*/
@Advanced
@Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Should HaplotypeCaller skip expensive calculations in active regions with no variants?",
required = false)
private boolean disableOptimizations = false;
/** /**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
* dbSNP is not used in any way for the calculations themselves. * dbSNP is not used in any way for the calculations themselves.
@ -962,7 +972,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final ActiveRegionTrimmer.Result trimmingResult = trimmer.trim(originalActiveRegion,allVariationEvents); final ActiveRegionTrimmer.Result trimmingResult = trimmer.trim(originalActiveRegion,allVariationEvents);
if (!trimmingResult.isVariationPresent()) if (!trimmingResult.isVariationPresent() && !disableOptimizations)
return referenceModelForNoVariation(originalActiveRegion,false); return referenceModelForNoVariation(originalActiveRegion,false);
final AssemblyResultSet assemblyResult = final AssemblyResultSet assemblyResult =
@ -980,7 +990,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// abort early if something is out of the acceptable range // abort early if something is out of the acceptable range
// TODO is this ever true at this point??? perhaps GGA. Need to check. // TODO is this ever true at this point??? perhaps GGA. Need to check.
if( ! assemblyResult.isVariationPresent() ) if( ! assemblyResult.isVariationPresent() && ! disableOptimizations)
return referenceModelForNoVariation(originalActiveRegion, false); return referenceModelForNoVariation(originalActiveRegion, false);
// For sure this is not true if gVCF is on. // For sure this is not true if gVCF is on.
@ -988,7 +998,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// TODO is this ever true at this point??? perhaps GGA. Need to check. // TODO is this ever true at this point??? perhaps GGA. Need to check.
if( regionForGenotyping.size() == 0 ) { if( regionForGenotyping.size() == 0 && ! disableOptimizations) {
// no reads remain after filtering so nothing else to do! // no reads remain after filtering so nothing else to do!
return referenceModelForNoVariation(originalActiveRegion, false); return referenceModelForNoVariation(originalActiveRegion, false);
} }
@ -1024,13 +1034,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
(consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles), (consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles),
emitReferenceConfidence()); emitReferenceConfidence());
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) { if ( bamWriter != null ) {
final Set<Haplotype> calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes());
if (disableOptimizations)
calledHaplotypeSet.add(assemblyResult.getReferenceHaplotype());
haplotypeBAMWriter.writeReadsAlignedToHaplotypes( haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
haplotypes, haplotypes,
assemblyResult.getPaddedReferenceLoc(), assemblyResult.getPaddedReferenceLoc(),
haplotypes, haplotypes,
calledHaplotypes.getCalledHaplotypes(), calledHaplotypeSet,
readLikelihoods); readLikelihoods);
} }

View File

@ -100,6 +100,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878", tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5}); ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals + " -disableOptimizations", NA12878bandedResolutionMD5});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }

View File

@ -353,8 +353,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test @Test
public void testLackSensitivityDueToBadHaplotypeSelectionFix() { public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list", b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);