HaplotypeCaller mode to skip assembly and genotyping for performance testing
-- Added HCPerformance evaluation Qscript -- Added some docs about one of the HC integration tests -- HaplotypeCaller / ART performance evaluation script
This commit is contained in:
parent
0ac4352614
commit
b53286cc3c
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -129,6 +130,7 @@ import java.util.*;
|
|||
@PartitionBy(PartitionType.LOCUS)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
//@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
/**
|
||||
|
|
@ -175,6 +177,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false)
|
||||
protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false)
|
||||
protected boolean justDetermineActiveRegions = false;
|
||||
|
||||
/**
|
||||
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
||||
* dbSNP is not used in any way for the calculations themselves.
|
||||
|
|
@ -403,6 +409,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
|
||||
@Override
|
||||
public Integer map( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) {
|
||||
if ( justDetermineActiveRegions )
|
||||
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
|
||||
return 1;
|
||||
|
||||
final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
|
||||
|
||||
|
|
|
|||
|
|
@ -115,6 +115,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
|
||||
}
|
||||
|
||||
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
// was modifying the GATKSamRecord and that was screwing up the traversal engine from map call to
|
||||
// map call. So the test is there for consistency but not for correctness. I'm not sure we can trust
|
||||
// any of the calls in that region because it is so messy. The only thing I would maybe be worried about is
|
||||
// that the three calls that are missing happen to all be the left most calls in the region
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
|
|
|
|||
Loading…
Reference in New Issue