Turning on allele trimming for the haplotype caller.

2012-10-10 10:47:26 -04:00 · 2012-10-10 10:47:26 -04:00 · 2a9ee89c19
parent b543bddbb7
commit 2a9ee89c19
5 changed files with 23 additions and 14 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -283,7 +283,7 @@ public class GenotypingEngine {
                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
                if( mergedVC == null ) { continue; }

-                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
+                HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
                int aCount = 0;
                for( final Allele a : mergedVC.getAlleles() ) {
                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
@ -308,9 +308,20 @@ public class GenotypingEngine {
                    }
                    genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
                }
-                final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
-
+                VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
                if( call != null ) {
+                    if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
+                        final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call);
+                        // also, need to update the allele -> haplotype mapping
+                        final HashMap<Allele, ArrayList<Haplotype>> alleleHashMapTrim = new HashMap<Allele, ArrayList<Haplotype>>();
+                        for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC
+                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleHashMap.get(call.getAlleles().get(iii)));
+                        }
+
+                        call = vcCallTrim;
+                        alleleHashMap = alleleHashMapTrim;
+                    }
+
                    returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
                }
            }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -40,7 +40,6 @@ import java.util.*;
 public class LikelihoodCalculationEngine {

    private static final double LOG_ONE_HALF = -Math.log10(2.0);
-    private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1;
    private final byte constantGCP;
    private final boolean DEBUG;
    private final PairHMM pairHMM;
@ -184,7 +183,7 @@ public class LikelihoodCalculationEngine {
                                haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
                            }
                        }
-                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
+                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
                    }
                }       
            }
@ -323,11 +322,13 @@ public class LikelihoodCalculationEngine {
        return bestHaplotypes;
    }

-    public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
+    public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
+                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
+                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
+                                                                                            final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
        final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
-            //final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
            final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();

            final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
@ -352,7 +353,7 @@ public class LikelihoodCalculationEngine {
                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
                    for( final Allele a : call.getFirst().getAlleles() )
-                        likelihoodMap.add(read,a,0.0);
+                        likelihoodMap.add(read, a, 0.0);
                }
            }

--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(CEUTRIO_BAM, "", "f5a809e3fbd9998f79b75bb2973209e1");
+        HCTestComplexVariants(CEUTRIO_BAM, "", "966da0de8466d21d79f1523488dff6bd");
    }

    private void HCTestSymbolicVariants(String bam, String args, String md5) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -508,10 +508,10 @@ public class UnifiedGenotyperEngine {

        // if we are subsetting alleles (either because there were too many or because some were not polymorphic)
        // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
-        if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
+        if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
            vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);

-        if ( annotationEngine != null && !limitedContext ) {
+        if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
            // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
            final ReadBackedPileup pileup = rawContext.getBasePileup();
            stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@ -1340,10 +1340,7 @@ public class VariantContextUtils {

    public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {

-        // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
-
        // see whether we need to trim common reference base from all alleles
-
        final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
        if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
            return inputVC;