Active region boundary parameters need to be bigger when running in GGA mode. CGL performance is quite a bit better as a result.

-- The troule stems from the fact that we may be trying to genotype indels even though it appears there are only SNPs in the reads.
This commit is contained in:
Ryan Poplin 2013-05-20 13:45:01 -04:00
parent b239cb76d4
commit 507853c583
3 changed files with 11 additions and 6 deletions

View File

@ -418,7 +418,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
private final static int PADDING_AROUND_OTHERS_FOR_CALLING = 150;
// the maximum extent into the full active region extension that we're willing to go in genotyping our events
private final static int MAX_GENOTYPING_ACTIVE_REGION_EXTENSION = 25;
private final static int MAX_DISCOVERY_ACTIVE_REGION_EXTENSION = 25;
private final static int MAX_GGA_ACTIVE_REGION_EXTENSION = 100;
private ActiveRegionTrimmer trimmer = null;
@ -549,7 +550,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
haplotypeBAMWriter = HaplotypeBAMWriter.create(bamWriterType, bamWriter, getToolkit().getSAMFileHeader());
trimmer = new ActiveRegionTrimmer(DEBUG, PADDING_AROUND_SNPS_FOR_CALLING, PADDING_AROUND_OTHERS_FOR_CALLING,
MAX_GENOTYPING_ACTIVE_REGION_EXTENSION, getToolkit().getGenomeLocParser());
UAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ? MAX_GGA_ACTIVE_REGION_EXTENSION : MAX_DISCOVERY_ACTIVE_REGION_EXTENSION,
getToolkit().getGenomeLocParser());
}
//---------------------------------------------------------------------------------------------------------------
@ -751,7 +753,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype );
if ( ! dontTrimActiveRegions ) {
return trimActiveRegion(activeRegion, haplotypes, fullReferenceWithPadding, paddedReferenceLoc);
return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc);
} else {
// we don't want to trim active regions, so go ahead and use the old one
return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
@ -763,6 +765,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
*
* @param originalActiveRegion our full active region
* @param haplotypes the list of haplotypes we've created from assembly
* @param activeAllelesToGenotype additional alleles we might need to genotype (can be empty)
* @param fullReferenceWithPadding the reference bases over the full padded location
* @param paddedReferenceLoc the span of the reference bases
* @return an AssemblyResult containing the trimmed active region with all of the reads we should use
@ -771,12 +774,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
*/
private AssemblyResult trimActiveRegion(final ActiveRegion originalActiveRegion,
final List<Haplotype> haplotypes,
final List<VariantContext> activeAllelesToGenotype,
final byte[] fullReferenceWithPadding,
final GenomeLoc paddedReferenceLoc) {
if ( DEBUG ) logger.info("Trimming active region " + originalActiveRegion + " with " + haplotypes.size() + " haplotypes");
EventMap.buildEventMapsForHaplotypes(haplotypes, fullReferenceWithPadding, paddedReferenceLoc, DEBUG);
final TreeSet<VariantContext> allVariantsWithinFullActiveRegion = EventMap.getAllVariantContexts(haplotypes);
allVariantsWithinFullActiveRegion.addAll(activeAllelesToGenotype);
final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion);
if ( trimmedActiveRegion == null ) {

View File

@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"90cbcc7e959eb591fb7c5e12d65e0e40");
"008029ee34e1becd8312e3c4d608033c");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"50894abb9d156bf480881cb5cb2a8a7d");
"ae8d95ffe77515cc74a55c2afd142826");
}
}

View File

@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"dbbc884a975587d8e7255ce47b58f438");
"bb30d0761dc9e2dfd57bfe07b72d06d8");
}
@Test