From 2a9ee89c190da2301ec6b5dce5c41f9ca845a603 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 10 Oct 2012 10:47:26 -0400 Subject: [PATCH] Turning on allele trimming for the haplotype caller. --- .../haplotypecaller/GenotypingEngine.java | 17 ++++++++++++++--- .../LikelihoodCalculationEngine.java | 11 ++++++----- .../HaplotypeCallerIntegrationTest.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 4 ++-- .../variantcontext/VariantContextUtils.java | 3 --- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 192befe67..8738def50 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -283,7 +283,7 @@ public class GenotypingEngine { final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } - final HashMap> alleleHashMap = new HashMap>(); + HashMap> alleleHashMap = new HashMap>(); int aCount = 0; for( final Allele a : mergedVC.getAlleles() ) { alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper @@ -308,9 +308,20 @@ public class GenotypingEngine { } genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); } - final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel); - + VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel); if( call != null ) { + if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! + final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call); + // also, need to update the allele -> haplotype mapping + final HashMap> alleleHashMapTrim = new HashMap>(); + for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC + alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleHashMap.get(call.getAlleles().get(iii))); + } + + call = vcCallTrim; + alleleHashMap = alleleHashMapTrim; + } + returnCalls.add( new Pair>>(call, alleleHashMap) ); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index db289ecab..072f81db9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -40,7 +40,6 @@ import java.util.*; public class LikelihoodCalculationEngine { private static final double LOG_ONE_HALF = -Math.log10(2.0); - private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1; private final byte constantGCP; private final boolean DEBUG; private final PairHMM pairHMM; @@ -184,7 +183,7 @@ public class LikelihoodCalculationEngine { haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF ); } } - haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? + haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); } } } @@ -323,11 +322,13 @@ public class LikelihoodCalculationEngine { return bestHaplotypes; } - public static Map partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap> perSampleReadList, final HashMap> perSampleFilteredReadList, final Pair>> call) { + public static Map partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, + final HashMap> perSampleReadList, + final HashMap> perSampleFilteredReadList, + final Pair>> call) { final Map returnMap = new HashMap(); final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst()); for( final Map.Entry> sample : perSampleReadList.entrySet() ) { - //final Map> alleleReadMap = new HashMap>(); final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); final ArrayList readsForThisSample = sample.getValue(); @@ -352,7 +353,7 @@ public class LikelihoodCalculationEngine { // only count the read if it overlaps the event, otherwise it is not added to the output read list at all if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { for( final Allele a : call.getFirst().getAlleles() ) - likelihoodMap.add(read,a,0.0); + likelihoodMap.add(read, a, 0.0); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 713bfb317..e94c9705c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "f5a809e3fbd9998f79b75bb2973209e1"); + HCTestComplexVariants(CEUTRIO_BAM, "", "966da0de8466d21d79f1523488dff6bd"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 609d2d731..aeb8b9dd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -508,10 +508,10 @@ public class UnifiedGenotyperEngine { // if we are subsetting alleles (either because there were too many or because some were not polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) + if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); - if ( annotationEngine != null && !limitedContext ) { + if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations final ReadBackedPileup pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 6ae81f76f..81959c998 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1340,10 +1340,7 @@ public class VariantContextUtils { public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed - // see whether we need to trim common reference base from all alleles - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) return inputVC;