Handle long->int precision in Bayesian estimate
This commit is contained in:
parent
b73d72fe94
commit
bf25e151ff
|
|
@ -341,12 +341,22 @@ public class RecalDatum {
|
||||||
return log10QempPriorCache[difference];
|
return log10QempPriorCache[difference];
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected double log10QempLikelihood(final double Qempirical, final long nObservations, final long nErrors) {
|
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
||||||
if ( nObservations == 0 )
|
if ( nObservations == 0 )
|
||||||
return 0.0;
|
return 0.0;
|
||||||
|
|
||||||
|
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
||||||
|
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
||||||
|
// before casting down to an int.
|
||||||
|
if ( nObservations > Integer.MAX_VALUE ) {
|
||||||
|
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
||||||
|
final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
|
||||||
|
nErrors = Math.round((double)nErrors * fraction);
|
||||||
|
nObservations = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
// this is just a straight binomial PDF
|
// this is just a straight binomial PDF
|
||||||
double log10Prob = MathUtils.log10BinomialProbability(longToInt(nObservations), longToInt(nErrors), QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
|
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
|
||||||
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
||||||
log10Prob = -Double.MAX_VALUE;
|
log10Prob = -Double.MAX_VALUE;
|
||||||
|
|
||||||
|
|
@ -355,8 +365,4 @@ public class RecalDatum {
|
||||||
|
|
||||||
return log10Prob;
|
return log10Prob;
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected int longToInt(final long l) {
|
|
||||||
return (l > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int)l;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -206,7 +206,7 @@ public class RecalDatumUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testlog10QempLikelihood() {
|
public void testBayesianEstimateOfEmpiricalQuality() {
|
||||||
|
|
||||||
final int Qrep = 20;
|
final int Qrep = 20;
|
||||||
|
|
||||||
|
|
@ -229,7 +229,7 @@ public class RecalDatumUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBayesianEstimateOfEmpiricalQuality() {
|
public void testlog10QempLikelihood() {
|
||||||
|
|
||||||
final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
|
final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
|
||||||
final int[] observations = new int[] {0, 10, 1000, 1000000};
|
final int[] observations = new int[] {0, 10, 1000, 1000000};
|
||||||
|
|
@ -248,16 +248,12 @@ public class RecalDatumUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
long bigNum = new Long((long)Integer.MAX_VALUE);
|
||||||
public void testLongToInt() {
|
bigNum *= 2L;
|
||||||
long l = new Long((long)Integer.MAX_VALUE);
|
final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000);
|
||||||
int i = RecalDatum.longToInt(l);
|
Assert.assertTrue(log10likelihood < 0.0);
|
||||||
Assert.assertEquals(i, Integer.MAX_VALUE);
|
Assert.assertFalse(Double.isInfinite(log10likelihood));
|
||||||
|
Assert.assertFalse(Double.isNaN(log10likelihood));
|
||||||
l++;
|
|
||||||
i = RecalDatum.longToInt(l);
|
|
||||||
Assert.assertEquals(i, Integer.MAX_VALUE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue