diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 4eb069542..eebc86b6b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -341,12 +341,22 @@ public class RecalDatum { return log10QempPriorCache[difference]; } - static protected double log10QempLikelihood(final double Qempirical, final long nObservations, final long nErrors) { + static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) { if ( nObservations == 0 ) return 0.0; + // the binomial code requires ints as input (because it does caching). This should theoretically be fine because + // there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow + // before casting down to an int. + if ( nObservations > Integer.MAX_VALUE ) { + // we need to decrease nErrors by the same fraction that we are decreasing nObservations + final double fraction = (double)Integer.MAX_VALUE / (double)nObservations; + nErrors = Math.round((double)nErrors * fraction); + nObservations = Integer.MAX_VALUE; + } + // this is just a straight binomial PDF - double log10Prob = MathUtils.log10BinomialProbability(longToInt(nObservations), longToInt(nErrors), QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical)); + double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical)); if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) ) log10Prob = -Double.MAX_VALUE; @@ -355,8 +365,4 @@ public class RecalDatum { return log10Prob; } - - static protected int longToInt(final long l) { - return (l > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int)l; - } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java index 2305fe566..9b2938d80 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -206,7 +206,7 @@ public class RecalDatumUnitTest extends BaseTest { } @Test - public void testlog10QempLikelihood() { + public void testBayesianEstimateOfEmpiricalQuality() { final int Qrep = 20; @@ -229,7 +229,7 @@ public class RecalDatumUnitTest extends BaseTest { } @Test - public void testBayesianEstimateOfEmpiricalQuality() { + public void testlog10QempLikelihood() { final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 }; final int[] observations = new int[] {0, 10, 1000, 1000000}; @@ -248,16 +248,12 @@ public class RecalDatumUnitTest extends BaseTest { } } } - } - @Test - public void testLongToInt() { - long l = new Long((long)Integer.MAX_VALUE); - int i = RecalDatum.longToInt(l); - Assert.assertEquals(i, Integer.MAX_VALUE); - - l++; - i = RecalDatum.longToInt(l); - Assert.assertEquals(i, Integer.MAX_VALUE); + long bigNum = new Long((long)Integer.MAX_VALUE); + bigNum *= 2L; + final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000); + Assert.assertTrue(log10likelihood < 0.0); + Assert.assertFalse(Double.isInfinite(log10likelihood)); + Assert.assertFalse(Double.isNaN(log10likelihood)); } } \ No newline at end of file