Added previous optimization to diploid (non-pool) model and shaved off 20% of runtime from it. Moved out some common functionality to joint estimate parent class.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2453 348d0f76-0448-11de-a6fe-93d51630548a
2009-12-28 21:20:48 +00:00 · 2009-12-28 21:20:48 +00:00 · 893c9c85fa
parent 92e3682991
commit 893c9c85fa
3 changed files with 25 additions and 4 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java
@ -61,6 +61,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul

        // first, calculate for AF=0 (no change to matrix)
        log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency();
+        double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0];

        // for each minor allele frequency, calculate log10PofDgivenAFi
        for (int i = 1; i <= numFrequencies; i++) {
@ -69,6 +70,16 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul

            // calculate new likelihoods
            log10PofDgivenAFi[baseIndex][i] = matrix.getLikelihoodsOfFrequency();
+
+            // an optimization to speed up the calculation: if we are beyond the local maximum such
+            //  that subsequent likelihoods won't factor into the confidence score, just quit
+            if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) {
+                ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex);
+                return;
+            }
+
+            if ( log10PofDgivenAFi[baseIndex][i] > maxLikelihoodSeen )
+                maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][i];
        }
    }

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java
@ -13,6 +13,10 @@ import java.io.PrintWriter;

 public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel {

+    // for use in optimizing the P(D|AF) calculations:
+    // how much off from the max likelihoods do we need to be before we can quit calculating?
+    protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
+
    // because the null allele frequencies are constant for a given N,
    // we cache the results to avoid having to recompute everything
    private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
@ -205,6 +209,15 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc

    /********************************************************************************/

+    /**
+     * @param freqI             allele frequency I
+     * @param numFrequencies    total number of allele frequencies
+     * @param altBaseIndex      the index of the alternate allele
+     */
+    protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) {
+        while ( ++freqI <= numFrequencies )
+            log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE;
+    }

    /**
     * @param ref                         the ref base
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
@ -13,8 +13,6 @@ import net.sf.samtools.SAMRecord;
 public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
    protected static final String POOL_SAMPLE_NAME = "POOL";

-    private static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
-
    private static FourBaseProbabilities fourBaseLikelihoods = null;
    private static boolean USE_CACHE = true;

@ -75,8 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
            // an optimization to speed up the calculation: if we are beyond the local maximum such
            //  that subsequent likelihoods won't factor into the confidence score, just quit
            if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) {
-                while ( ++frequency <= nChromosomes )
-                    log10PofDgivenAFi[altIndex][frequency] = -1.0 * Double.MAX_VALUE;
+                ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex);
                return;
            }