HaplotypeCaller mode to skip assembly and genotyping for performance testing

-- Added HCPerformance evaluation Qscript
-- Added some docs about one of the HC integration tests
-- HaplotypeCaller / ART performance evaluation script
This commit is contained in:
Mark DePristo 2013-01-07 13:40:06 -05:00
parent 0ac4352614
commit b53286cc3c
2 changed files with 14 additions and 0 deletions

View File

@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -129,6 +130,7 @@ import java.util.*;
@PartitionBy(PartitionType.LOCUS)
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@ActiveRegionExtension(extension=65, maxRegion=300)
//@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
@ -175,6 +177,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false)
protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
@Hidden
@Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false)
protected boolean justDetermineActiveRegions = false;
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
* dbSNP is not used in any way for the calculations themselves.
@ -403,6 +409,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Override
public Integer map( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) {
if ( justDetermineActiveRegions )
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
return 1;
final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();

View File

@ -115,6 +115,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
}
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
// any of the calls in that region because it is so messy. The only thing I would maybe be worried about is
// that the three calls that are missing happen to all be the left most calls in the region
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";