Fixed precision problem in the Bayesian calculation of Qemp: we need to cap below max integer because the MathUtils code add +1.
Added unit tests for handling large number of observations.
This commit is contained in:
parent
04e3978b04
commit
b4e7b3d691
|
|
@ -90,6 +90,10 @@ public class QualityUtils {
|
|||
return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
static public double qualToErrorProbLog10(final double qual) {
|
||||
return qual/-10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -341,6 +341,8 @@ public class RecalDatum {
|
|||
return log10QempPriorCache[difference];
|
||||
}
|
||||
|
||||
static private final long MAX_NUMBER_OF_OBSERVATIONS = Integer.MAX_VALUE - 1;
|
||||
|
||||
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
||||
if ( nObservations == 0 )
|
||||
return 0.0;
|
||||
|
|
@ -348,15 +350,15 @@ public class RecalDatum {
|
|||
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
||||
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
||||
// before casting down to an int.
|
||||
if ( nObservations > Integer.MAX_VALUE ) {
|
||||
if ( nObservations > MAX_NUMBER_OF_OBSERVATIONS ) {
|
||||
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
||||
final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
|
||||
final double fraction = (double)MAX_NUMBER_OF_OBSERVATIONS / (double)nObservations;
|
||||
nErrors = Math.round((double)nErrors * fraction);
|
||||
nObservations = Integer.MAX_VALUE;
|
||||
nObservations = MAX_NUMBER_OF_OBSERVATIONS;
|
||||
}
|
||||
|
||||
// this is just a straight binomial PDF
|
||||
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
|
||||
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10(Qempirical));
|
||||
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
||||
log10Prob = -Double.MAX_VALUE;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,25 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
|
||||
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table");
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testManyObservations() {
|
||||
File recal = new File(privateTestDir + "bqsr.manyObservations.piece.table");
|
||||
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
List<File> recalFiles = new LinkedList<File> ();
|
||||
for ( int i=0; i < 5; i++ )
|
||||
recalFiles.add(recal);
|
||||
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
gatherer.gather(recalFiles, output);
|
||||
|
||||
GATKReport originalReport = new GATKReport(new File(privateTestDir + "bqsr.manyObservations.full.table"));
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
testReports(originalReport, calculatedReport);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testGatherBQSR() {
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
|
|
@ -42,6 +61,11 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
GATKReport originalReport = new GATKReport(recal_original);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
testReports(originalReport, calculatedReport);
|
||||
}
|
||||
|
||||
private void testReports(final GATKReport originalReport, final GATKReport calculatedReport) {
|
||||
|
||||
// test the Arguments table
|
||||
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
|
|
|
|||
Loading…
Reference in New Issue