Handle long->int precision in Bayesian estimate

2013-01-06 12:26:32 -05:00 · 2013-01-06 12:26:32 -05:00 · bf25e151ff
parent b73d72fe94
commit bf25e151ff
2 changed files with 20 additions and 18 deletions
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
@ -341,12 +341,22 @@ public class RecalDatum {
        return log10QempPriorCache[difference];
    }

-    static protected double log10QempLikelihood(final double Qempirical, final long nObservations, final long nErrors) {
+    static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
        if ( nObservations == 0 )
            return 0.0;

+        // the binomial code requires ints as input (because it does caching).  This should theoretically be fine because
+        // there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
+        // before casting down to an int.
+        if ( nObservations > Integer.MAX_VALUE ) {
+            // we need to decrease nErrors by the same fraction that we are decreasing nObservations
+            final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
+            nErrors = Math.round((double)nErrors * fraction);
+            nObservations = Integer.MAX_VALUE;
+        }
+
        // this is just a straight binomial PDF
-        double log10Prob = MathUtils.log10BinomialProbability(longToInt(nObservations), longToInt(nErrors), QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
+        double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
        if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
            log10Prob = -Double.MAX_VALUE;

@ -355,8 +365,4 @@ public class RecalDatum {

        return log10Prob;
    }
-
-    static protected int longToInt(final long l) {
-        return (l > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int)l;
-    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
@ -206,7 +206,7 @@ public class RecalDatumUnitTest extends BaseTest {
    }

    @Test
-    public void testlog10QempLikelihood() {
+    public void testBayesianEstimateOfEmpiricalQuality() {

        final int Qrep = 20;

@ -229,7 +229,7 @@ public class RecalDatumUnitTest extends BaseTest {
    }

    @Test
-    public void testBayesianEstimateOfEmpiricalQuality() {
+    public void testlog10QempLikelihood() {

        final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
        final int[] observations = new int[] {0, 10, 1000, 1000000};
@ -248,16 +248,12 @@ public class RecalDatumUnitTest extends BaseTest {
                }
            }
        }
-    }

-    @Test
-    public void testLongToInt() {
-        long l = new Long((long)Integer.MAX_VALUE);
-        int i = RecalDatum.longToInt(l);
-        Assert.assertEquals(i, Integer.MAX_VALUE);
-
-        l++;
-        i = RecalDatum.longToInt(l);
-        Assert.assertEquals(i, Integer.MAX_VALUE);
+        long bigNum = new Long((long)Integer.MAX_VALUE);
+        bigNum *= 2L;
+        final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000);
+        Assert.assertTrue(log10likelihood < 0.0);
+        Assert.assertFalse(Double.isInfinite(log10likelihood));
+        Assert.assertFalse(Double.isNaN(log10likelihood));
    }
 }