We no longer subset down to the best N haplotypes for the GL calculation.

I explain in comments within the code that this was causing problems with the marginalization over events.
2013-06-04 09:26:50 -04:00 · 2013-06-04 09:26:50 -04:00 · c0030f3f2d
parent c0e3874db0
commit c0030f3f2d
2 changed files with 8 additions and 20 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -694,11 +694,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        //logger.info("Computing read likelihoods with " + assemblyResult.regionForGenotyping.size() + " reads");
        final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( assemblyResult.haplotypes, splitReadsBySample( assemblyResult.regionForGenotyping.getReads() ) );

-        // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
-        final List<Haplotype> bestHaplotypes = selectBestHaplotypesForGenotyping(assemblyResult.haplotypes, stratifiedReadMap);
+        // Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
+        //  was a bad interaction between that selection and the marginalization that happens over each event when computing
+        //  GLs.  In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the
+        //  haplotype containing C as reference (and vice versa).  Now this is fine if all possible haplotypes are included
+        //  in the genotyping, but we lose information if we select down to a few haplotypes.  [EB]

        final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
-                bestHaplotypes,
+                assemblyResult.haplotypes,
                stratifiedReadMap,
                perSampleFilteredReadList,
                assemblyResult.fullReferenceWithPadding,
@ -711,7 +714,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        // TODO -- must disable if we are doing NCT, or set the output type of ! presorted
        if ( bamWriter != null ) {
            haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResult.haplotypes, assemblyResult.paddedReferenceLoc,
-                    bestHaplotypes,
+                    assemblyResult.haplotypes,
                    calledHaplotypes.getCalledHaplotypes(),
                    stratifiedReadMap);
        }
@ -863,21 +866,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
    }

-    /**
-     * Select the best N haplotypes according to their likelihoods, if appropriate
-     *
-     * @param haplotypes a list of haplotypes to consider
-     * @param stratifiedReadMap a map from samples -> read likelihoods
-     * @return the list of haplotypes to genotype
-     */
-    protected List<Haplotype> selectBestHaplotypesForGenotyping(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
-        if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-            return haplotypes;
-        } else {
-            return likelihoodCalculationEngine.selectBestHaplotypesFromEachSample(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation);
-        }
-    }
-
    //---------------------------------------------------------------------------------------------------------------
    //
    // reduce
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@ -215,7 +215,7 @@ public abstract class LocalAssemblyEngine {
                    returnHaplotypes.add(h);

                    if ( debug )
-                        logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize());
+                        logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
                }
            }
        }