Active region boundary parameters need to be bigger when running in GGA mode. CGL performance is quite a bit better as a result.
-- The troule stems from the fact that we may be trying to genotype indels even though it appears there are only SNPs in the reads.
This commit is contained in:
parent
b239cb76d4
commit
507853c583
|
|
@ -418,7 +418,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
private final static int PADDING_AROUND_OTHERS_FOR_CALLING = 150;
|
||||
|
||||
// the maximum extent into the full active region extension that we're willing to go in genotyping our events
|
||||
private final static int MAX_GENOTYPING_ACTIVE_REGION_EXTENSION = 25;
|
||||
private final static int MAX_DISCOVERY_ACTIVE_REGION_EXTENSION = 25;
|
||||
private final static int MAX_GGA_ACTIVE_REGION_EXTENSION = 100;
|
||||
|
||||
private ActiveRegionTrimmer trimmer = null;
|
||||
|
||||
|
|
@ -549,7 +550,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
haplotypeBAMWriter = HaplotypeBAMWriter.create(bamWriterType, bamWriter, getToolkit().getSAMFileHeader());
|
||||
|
||||
trimmer = new ActiveRegionTrimmer(DEBUG, PADDING_AROUND_SNPS_FOR_CALLING, PADDING_AROUND_OTHERS_FOR_CALLING,
|
||||
MAX_GENOTYPING_ACTIVE_REGION_EXTENSION, getToolkit().getGenomeLocParser());
|
||||
UAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ? MAX_GGA_ACTIVE_REGION_EXTENSION : MAX_DISCOVERY_ACTIVE_REGION_EXTENSION,
|
||||
getToolkit().getGenomeLocParser());
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -751,7 +753,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, paddedReferenceLoc, activeAllelesToGenotype );
|
||||
|
||||
if ( ! dontTrimActiveRegions ) {
|
||||
return trimActiveRegion(activeRegion, haplotypes, fullReferenceWithPadding, paddedReferenceLoc);
|
||||
return trimActiveRegion(activeRegion, haplotypes, activeAllelesToGenotype, fullReferenceWithPadding, paddedReferenceLoc);
|
||||
} else {
|
||||
// we don't want to trim active regions, so go ahead and use the old one
|
||||
return new AssemblyResult(haplotypes, activeRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
|
||||
|
|
@ -763,6 +765,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
*
|
||||
* @param originalActiveRegion our full active region
|
||||
* @param haplotypes the list of haplotypes we've created from assembly
|
||||
* @param activeAllelesToGenotype additional alleles we might need to genotype (can be empty)
|
||||
* @param fullReferenceWithPadding the reference bases over the full padded location
|
||||
* @param paddedReferenceLoc the span of the reference bases
|
||||
* @return an AssemblyResult containing the trimmed active region with all of the reads we should use
|
||||
|
|
@ -771,12 +774,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
*/
|
||||
private AssemblyResult trimActiveRegion(final ActiveRegion originalActiveRegion,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<VariantContext> activeAllelesToGenotype,
|
||||
final byte[] fullReferenceWithPadding,
|
||||
final GenomeLoc paddedReferenceLoc) {
|
||||
if ( DEBUG ) logger.info("Trimming active region " + originalActiveRegion + " with " + haplotypes.size() + " haplotypes");
|
||||
|
||||
EventMap.buildEventMapsForHaplotypes(haplotypes, fullReferenceWithPadding, paddedReferenceLoc, DEBUG);
|
||||
final TreeSet<VariantContext> allVariantsWithinFullActiveRegion = EventMap.getAllVariantContexts(haplotypes);
|
||||
allVariantsWithinFullActiveRegion.addAll(activeAllelesToGenotype);
|
||||
final ActiveRegion trimmedActiveRegion = trimmer.trimRegion(originalActiveRegion, allVariantsWithinFullActiveRegion);
|
||||
|
||||
if ( trimmedActiveRegion == null ) {
|
||||
|
|
|
|||
|
|
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"90cbcc7e959eb591fb7c5e12d65e0e40");
|
||||
"008029ee34e1becd8312e3c4d608033c");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"50894abb9d156bf480881cb5cb2a8a7d");
|
||||
"ae8d95ffe77515cc74a55c2afd142826");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"dbbc884a975587d8e7255ce47b58f438");
|
||||
"bb30d0761dc9e2dfd57bfe07b72d06d8");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue