Merge pull request #751 from broadinstitute/pd_bamout_ref_regions

Added -disableOptimizations argument to HaplotypeCaller.
This commit is contained in:
rpoplin 2014-10-21 11:50:46 -04:00
commit 312a833db3
4 changed files with 21 additions and 8 deletions

View File

@ -52,6 +52,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Hidden;
import org.broadinstitute.gatk.utils.GenomeLoc;
@ -94,7 +95,7 @@ class ActiveRegionTrimmer {
*/
private boolean emitReferenceConfidence;
@Hidden
@Advanced
@Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false)
protected boolean dontTrimActiveRegions = false;

View File

@ -292,6 +292,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName="bamWriterType", shortName="bamWriterType", doc="How should haplotypes be written to the BAM?", required = false)
public HaplotypeBAMWriter.Type bamWriterType = HaplotypeBAMWriter.Type.CALLED_HAPLOTYPES;
/**
* If set, certain "early exit" optimizations in HaplotypeCaller will be disabled. This is most likely to be useful if
* you're using the -bamout argument to examine the placement of reads following reassembly and are interested in the
* reads in regions with no variations. The -forceActive and -dontTrimActiveRegions arguments may also be necessary.
*/
@Advanced
@Argument(fullName = "disableOptimizations", shortName="disableOptimizations", doc="Should HaplotypeCaller skip expensive calculations in active regions with no variants?",
required = false)
private boolean disableOptimizations = false;
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
* dbSNP is not used in any way for the calculations themselves.
@ -962,7 +972,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final ActiveRegionTrimmer.Result trimmingResult = trimmer.trim(originalActiveRegion,allVariationEvents);
if (!trimmingResult.isVariationPresent())
if (!trimmingResult.isVariationPresent() && !disableOptimizations)
return referenceModelForNoVariation(originalActiveRegion,false);
final AssemblyResultSet assemblyResult =
@ -980,7 +990,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// abort early if something is out of the acceptable range
// TODO is this ever true at this point??? perhaps GGA. Need to check.
if( ! assemblyResult.isVariationPresent() )
if( ! assemblyResult.isVariationPresent() && ! disableOptimizations)
return referenceModelForNoVariation(originalActiveRegion, false);
// For sure this is not true if gVCF is on.
@ -988,7 +998,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// TODO is this ever true at this point??? perhaps GGA. Need to check.
if( regionForGenotyping.size() == 0 ) {
if( regionForGenotyping.size() == 0 && ! disableOptimizations) {
// no reads remain after filtering so nothing else to do!
return referenceModelForNoVariation(originalActiveRegion, false);
}
@ -1024,13 +1034,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
(consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles),
emitReferenceConfidence());
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
final Set<Haplotype> calledHaplotypeSet = new HashSet<>(calledHaplotypes.getCalledHaplotypes());
if (disableOptimizations)
calledHaplotypeSet.add(assemblyResult.getReferenceHaplotype());
haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
haplotypes,
assemblyResult.getPaddedReferenceLoc(),
haplotypes,
calledHaplotypes.getCalledHaplotypes(),
calledHaplotypeSet,
readLikelihoods);
}

View File

@ -100,6 +100,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
tests.add(new Object[]{NA12878_WEx + " -I " + privateTestDir + "NA20313.highCoverageRegion.bam -sn NA12878",
ReferenceConfidenceMode.GVCF, WExIntervals, NA12878bandedResolutionMD5});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals + " -disableOptimizations", NA12878bandedResolutionMD5});
return tests.toArray(new Object[][]{});
}

View File

@ -353,8 +353,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);