diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 96f327631..992a411ea 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -129,6 +130,7 @@ import java.util.*; @PartitionBy(PartitionType.LOCUS) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @ActiveRegionExtension(extension=65, maxRegion=300) +//@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** @@ -175,6 +177,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false) protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; + @Hidden + @Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false) + protected boolean justDetermineActiveRegions = false; + /** * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * dbSNP is not used in any way for the calculations themselves. @@ -403,6 +409,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Override public Integer map( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) { + if ( justDetermineActiveRegions ) + // we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work + return 1; final ArrayList activeAllelesToGenotype = new ArrayList(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 142fa39bf..060fda75a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -115,6 +115,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735"); } + // That problem bam came from a user on the forum and it spotted a problem where the ReadClipper + // was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to + // map call. So the test is there for consistency but not for correctness. I'm not sure we can trust + // any of the calls in that region because it is so messy. The only thing I would maybe be worried about is + // that the three calls that are missing happen to all be the left most calls in the region @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";