Fixed precision problem in the Bayesian calculation of Qemp: we need to cap below max integer because the MathUtils code add +1.
Added unit tests for handling large number of observations.
This commit is contained in:
parent
04e3978b04
commit
b4e7b3d691
|
|
@ -90,6 +90,10 @@ public class QualityUtils {
|
||||||
return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public double qualToErrorProbLog10(final double qual) {
|
||||||
|
return qual/-10.0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -341,6 +341,8 @@ public class RecalDatum {
|
||||||
return log10QempPriorCache[difference];
|
return log10QempPriorCache[difference];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static private final long MAX_NUMBER_OF_OBSERVATIONS = Integer.MAX_VALUE - 1;
|
||||||
|
|
||||||
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
||||||
if ( nObservations == 0 )
|
if ( nObservations == 0 )
|
||||||
return 0.0;
|
return 0.0;
|
||||||
|
|
@ -348,15 +350,15 @@ public class RecalDatum {
|
||||||
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
||||||
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
||||||
// before casting down to an int.
|
// before casting down to an int.
|
||||||
if ( nObservations > Integer.MAX_VALUE ) {
|
if ( nObservations > MAX_NUMBER_OF_OBSERVATIONS ) {
|
||||||
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
||||||
final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
|
final double fraction = (double)MAX_NUMBER_OF_OBSERVATIONS / (double)nObservations;
|
||||||
nErrors = Math.round((double)nErrors * fraction);
|
nErrors = Math.round((double)nErrors * fraction);
|
||||||
nObservations = Integer.MAX_VALUE;
|
nObservations = MAX_NUMBER_OF_OBSERVATIONS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is just a straight binomial PDF
|
// this is just a straight binomial PDF
|
||||||
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
|
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10(Qempirical));
|
||||||
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
||||||
log10Prob = -Double.MAX_VALUE;
|
log10Prob = -Double.MAX_VALUE;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,25 @@ public class BQSRGathererUnitTest extends BaseTest {
|
||||||
|
|
||||||
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table");
|
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table");
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testManyObservations() {
|
||||||
|
File recal = new File(privateTestDir + "bqsr.manyObservations.piece.table");
|
||||||
|
|
||||||
|
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||||
|
|
||||||
|
List<File> recalFiles = new LinkedList<File> ();
|
||||||
|
for ( int i=0; i < 5; i++ )
|
||||||
|
recalFiles.add(recal);
|
||||||
|
|
||||||
|
BQSRGatherer gatherer = new BQSRGatherer();
|
||||||
|
gatherer.gather(recalFiles, output);
|
||||||
|
|
||||||
|
GATKReport originalReport = new GATKReport(new File(privateTestDir + "bqsr.manyObservations.full.table"));
|
||||||
|
GATKReport calculatedReport = new GATKReport(output);
|
||||||
|
|
||||||
|
testReports(originalReport, calculatedReport);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testGatherBQSR() {
|
public void testGatherBQSR() {
|
||||||
BQSRGatherer gatherer = new BQSRGatherer();
|
BQSRGatherer gatherer = new BQSRGatherer();
|
||||||
|
|
@ -42,6 +61,11 @@ public class BQSRGathererUnitTest extends BaseTest {
|
||||||
GATKReport originalReport = new GATKReport(recal_original);
|
GATKReport originalReport = new GATKReport(recal_original);
|
||||||
GATKReport calculatedReport = new GATKReport(output);
|
GATKReport calculatedReport = new GATKReport(output);
|
||||||
|
|
||||||
|
testReports(originalReport, calculatedReport);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testReports(final GATKReport originalReport, final GATKReport calculatedReport) {
|
||||||
|
|
||||||
// test the Arguments table
|
// test the Arguments table
|
||||||
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue