Improvement in exact allele frequency calculation model (still under test, but this is definitely better than what I had before). Instead of approximating log(10^x+10^y) as max(x,y), approximate full Jacobian formula max(x,y)+log(1+10^-abs(x-y)) with static lookup table for the second term.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4647 348d0f76-0448-11de-a6fe-93d51630548a
2010-11-11 01:22:35 +00:00 · 2010-11-11 01:22:35 +00:00 · 2f3be24a00
parent f35d1aa43f
commit 2f3be24a00
1 changed files with 65 additions and 28 deletions
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -43,16 +43,32 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

    private boolean DEBUGOUT = false;
    private boolean SIMPLE_GREEDY_GENOTYPER = false;
-    private double[] log10Cache;
+    private static final double[] log10Cache;
+    private static final double[] jacobianLogTable;
+    private static final int JACOBIAN_LOG_TABLE_SIZE = 101;
+    private static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
+    private static final double MAX_JACOBIAN_TOLERANCE = 10.0;
+    private static final int MAXN = 10000;
+
+    static {
+        log10Cache = new double[2*MAXN];
+	    jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
+
+        log10Cache[0] = Double.NEGATIVE_INFINITY;
+        for (int k=1; k < 2*MAXN; k++)
+            log10Cache[k] = Math.log10(k);
+
+	for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
+	    jacobianLogTable[k] = Math.log10(1.0+Math.pow(10.0,-((double)k) 
+						       * JACOBIAN_LOG_TABLE_STEP));
+
+	}
+
+    }

    protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
        super(N, logger, verboseWriter);

-        log10Cache = new double[2*N];
-
-        log10Cache[0] = Double.NEGATIVE_INFINITY;
-        for (int k=1; k < 2*N; k++)
-            log10Cache[k] = Math.log10(k);
    }

    public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -137,41 +153,62 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                //YMatrix[j][k] = num/den;
                logYMatrix[j][k] = logNum - logDenominator;
            }
-      //      int kstart = k-1;
-     //       for (k=kstart; k <= 2*j; k++)
-       //         logYMatrix[j][k] = logYMatrix[j][kstart];
-

        }


        for (int k=0; k <= numChr; k++)
            log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
+
    }


    double softMax(double[] vec) {
        // compute naively log10(10^x[0] + 10^x[1]+...)
-        //return Math.log10(MathUtils.sumLog10(vec));
-
-        //int maxInd = MathUtils.maxElementIndex(vec);
-        //return vec[maxInd];
-
-        // still more efficient: inline search for max. Hard-code fact that vec has length 3
-        if (vec[0] > vec[1])
-            if (vec[2] > vec[0])
-                return vec[2];
-            else
-                return vec[0];
-        else
-            if (vec[2]>vec[1])
-                return vec[2];
-            else
-                return vec[1];
-
-
+	//        return Math.log10(MathUtils.sumLog10(vec));

+	// better approximation: do Jacobian logarithm function on data pairs
+	double a = softMaxPair(vec[0],vec[1]);
+        return softMaxPair(a,vec[2]);
    }
+
+    double softMaxPair(double x, double y) {
+        if (Double.isInfinite(x))
+            return y;
+
+        if (Double.isInfinite(y))
+            return x;
+
+	if (y >= x + MAX_JACOBIAN_TOLERANCE)
+	    return y;
+	if (x >= y + MAX_JACOBIAN_TOLERANCE)
+	    return x;
+	
+	// OK, so |y-x| < tol: we use the following identity then:
+	// we need to compute log10(10^x + 10^y)
+	// By Jacobian logarithm identity, this is equal to
+	// max(x,y) + log10(1+10^-abs(x-y))
+	// we compute the second term as a table lookup
+	// with integer quantization
+	double diff = Math.abs(x-y);
+	double t1 =x;
+	if (y > x)
+	    t1 = y;
+	// t has max(x,y)
+	// we have pre-stored correction for 0,0.1,0.2,... 10.0
+	int ind = (int)Math.round(diff/JACOBIAN_LOG_TABLE_STEP);
+	double t2 = jacobianLogTable[ind];
+
+	// gdebug+
+	//double z =Math.log10(1+Math.pow(10.0,-diff));
+	//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
+	//gdebug-
+	return t1+t2;
+	// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
+    }
+
+
+
    /**
     * Can be overridden by concrete subclasses
     * @param contexts             alignment contexts