Hooking up the Bayesian estimate code for calculating Qemp in BQSR; various fixes after adding unit tests.

This commit is contained in:
Eric Banks 2013-01-04 14:43:11 -05:00
parent c7039a9b71
commit dd7f5e2be7
9 changed files with 199 additions and 95 deletions

View File

@ -53,21 +53,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "2f250fecb930e0dfe0f63fe0fed3960b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "26c8d7226139a040557b1d3b1c8792f0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "9b43a1839cb6ea03aec1d96f15ca8efb")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "3159a9d136c45e4a65d46a23dc8fd3b5")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "bb7262829effbbdbc8d88dd36f480368")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "fbb002fa2b9197c4b555852dccc11562")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "7392acb71131a60a527ca32715fc59be")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "49d4383896a90795d94138db1410a7df")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "427448eff98cf194cc7217c0b1401e79")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "50cd1a10b6ecb3d09f90f1e4a66da95d")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "1dc71561c9d0fb56f9876cb5043c5376")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "13e8f032e76340b114847c90af0a1f8a")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "03f58ae4f9d203034e895a3636fc108f")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "49d4383896a90795d94138db1410a7df")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2db2ef8c2d63e167663d70340182f49a")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "6b3f252718f59cf9fd3f7612f73a35bf")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "863576ac9ff0b0e02f2e84aef15923a7")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "03e28f48201a35c70d1cf48e9f45364f")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "6e3c5635d387a1c428a7c9c88ad26488")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "6507adcb94bacde4cdee9caa9f14f24b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "399bbb4bf80764dfc644b2f95d824615")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "34d70899253c2b3343ca9ae944291c30")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "e61fa47bfc08433f0cd55558e2081548")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "5c2622c63225b8b04990baf0ae4de07c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "ee7191d83d7d5bb957dc4595883c32f1")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "da92f4730356f479c2c2b71497cfac6d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "8075595113b48c0c7ead08ce41bef9fe")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "be05834841c5690c66910270521d5c32")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "e61fa47bfc08433f0cd55558e2081548")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "8ee0b498dbbc95ce76393a0f089fec92")},
};
}
@ -104,7 +104,7 @@ public class BQSRIntegrationTest extends WalkerTest {
" -sortAllCols" +
" --plot_pdf_file /dev/null" +
" --intermediate_csv_file %s",
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
Arrays.asList("dd6e0e1e3f53f8ae0c8f5de21ded6ee9"));
executeTest("testBQSR-CSVfile", spec);
}

View File

@ -79,7 +79,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
return new RecalDatum(1L, isError, reportedQual);
}
/**
@ -133,12 +133,12 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1.0, isError);
table.get(keys).increment(1, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1.0, isError);
existingDatum.increment(1, isError);
}
}
}

View File

@ -68,7 +68,7 @@ public class RecalDatum {
/**
* number of bases seen in total
*/
private double numObservations;
private long numObservations;
/**
* number of bases seen that didn't match the reference
@ -89,13 +89,13 @@ public class RecalDatum {
/**
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
*
* @param _numObservations
* @param _numMismatches
* @param reportedQuality
* @param _numObservations observations
* @param _numMismatches mismatches
* @param reportedQuality Qreported
*/
public RecalDatum(final double _numObservations, final double _numMismatches, final byte reportedQuality) {
public RecalDatum(final long _numObservations, final double _numMismatches, final byte reportedQuality) {
if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
if ( _numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( _numMismatches < 0.0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
numObservations = _numObservations;
@ -106,7 +106,7 @@ public class RecalDatum {
/**
* Copy copy into this recal datum, overwriting all of this objects data
* @param copy
* @param copy RecalDatum to copy
*/
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.getNumObservations();
@ -119,7 +119,7 @@ public class RecalDatum {
* Add in all of the data from other into this object, updating the reported quality from the expected
* error rate implied by the two reported qualities
*
* @param other
* @param other RecalDatum to combine
*/
public synchronized void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
@ -192,7 +192,7 @@ public class RecalDatum {
@Override
public String toString() {
return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
return String.format("%d,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
}
public String stringForCSV() {
@ -205,11 +205,11 @@ public class RecalDatum {
//
//---------------------------------------------------------------------------------------------------------------
public final double getNumObservations() {
public final long getNumObservations() {
return numObservations;
}
public final synchronized void setNumObservations(final double numObservations) {
public final synchronized void setNumObservations(final long numObservations) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
this.numObservations = numObservations;
empiricalQuality = UNINITIALIZED;
@ -227,7 +227,7 @@ public class RecalDatum {
}
@Requires({"by >= 0"})
public final synchronized void incrementNumObservations(final double by) {
public final synchronized void incrementNumObservations(final long by) {
numObservations += by;
empiricalQuality = UNINITIALIZED;
}
@ -240,7 +240,7 @@ public class RecalDatum {
@Requires({"incObservations >= 0", "incMismatches >= 0"})
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
public final synchronized void increment(final double incObservations, final double incMismatches) {
public final synchronized void increment(final long incObservations, final double incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
empiricalQuality = UNINITIALIZED;
@ -248,7 +248,7 @@ public class RecalDatum {
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
public final synchronized void increment(final boolean isError) {
increment(1, isError ? 1 : 0.0);
increment(1, isError ? 1.0 : 0.0);
}
// -------------------------------------------------------------------------------------
@ -257,19 +257,6 @@ public class RecalDatum {
//
// -------------------------------------------------------------------------------------
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized void calcEmpiricalQuality() {
// TODO -- add code for Bayesian estimate of Qemp here
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
/**
* calculate the expected number of errors given the estimated Q reported and the number of observations
* in this datum.
@ -281,10 +268,29 @@ public class RecalDatum {
return getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
}
static final boolean DEBUG = false;
static final double RESOLUTION_BINS_PER_QUAL = 1.0;
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized void calcEmpiricalQuality() {
static public double bayesianEstimateOfEmpiricalQuality(final double nObservations, final double nErrors, final double QReported) {
// smoothing is one error and one non-error observation
final long mismatches = (long)(getNumMismatches() + 0.5) + SMOOTHING_CONSTANT;
final long observations = getNumObservations() + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT;
final double empiricalQual = RecalDatum.bayesianEstimateOfEmpiricalQuality(observations, mismatches, getEstimatedQReported());
// This is the old and busted point estimate approach:
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
//static final boolean DEBUG = false;
static private final double RESOLUTION_BINS_PER_QUAL = 1.0;
static public double bayesianEstimateOfEmpiricalQuality(final long nObservations, final long nErrors, final double QReported) {
final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL;
@ -294,14 +300,14 @@ public class RecalDatum {
final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL;
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10Likelihood(QEmpOfBin, nObservations, nErrors);
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10QempLikelihood(QEmpOfBin, nObservations, nErrors);
if ( DEBUG )
System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
//if ( DEBUG )
// System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
}
if ( DEBUG )
System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
//if ( DEBUG )
// System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
final double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10Posteriors);
final int MLEbin = MathUtils.maxElementIndex(normalizedPosteriors);
@ -310,35 +316,47 @@ public class RecalDatum {
return Qemp;
}
static final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
static {
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
final double GF_a = 0.0;
final double GF_b = 0.9;
final double GF_c = 0.0;
final double GF_d = 0.5;
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ )
log10QempPriorCache[i] = Math.log10(gaussian.value((double) i));
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
double log10Prior = Math.log10(gaussian.value((double) i));
if ( Double.isInfinite(log10Prior) )
log10Prior = -Double.MAX_VALUE;
log10QempPriorCache[i] = log10Prior;
}
}
static public double log10QempPrior(final double Qempirical, final double Qreported) {
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
//if ( DEBUG )
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
return log10QempPriorCache[difference];
}
static public double log10Likelihood(final double Qempirical, final double nObservations, final double nErrors) {
static protected double log10QempLikelihood(final double Qempirical, final long nObservations, final long nErrors) {
if ( nObservations == 0 )
return 0.0;
// this is just a straight binomial PDF
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
if ( log10Prob == Double.NEGATIVE_INFINITY )
double log10Prob = MathUtils.log10BinomialProbability(longToInt(nObservations), longToInt(nErrors), QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
log10Prob = -Double.MAX_VALUE;
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
//if ( DEBUG )
// System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
return log10Prob;
}
static protected int longToInt(final long l) {
return (l > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int)l;
}
}

View File

@ -264,7 +264,7 @@ public class RecalDatumNode<T extends RecalDatum> {
for ( final RecalDatumNode<T> subnode : subnodes ) {
// use the yates correction to help avoid all zeros => NaN
counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
counts[i][1] = Math.round(subnode.getRecalDatum().getNumObservations()) + 2L;
counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
i++;
}
@ -320,7 +320,7 @@ public class RecalDatumNode<T extends RecalDatum> {
if ( isLeaf() ) {
// this is leave node
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * (double)recalDatum.getNumObservations();
// TODO -- how we can generalize this calculation?
// if ( this.qEnd <= minInterestingQual )
// // It's free to merge up quality scores below the smallest interesting one

View File

@ -93,7 +93,7 @@ public class RecalUtils {
private static final Pair<String, String> eventType = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%.2f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%.2f");
/**

View File

@ -68,15 +68,6 @@ public class RecalibrationReport {
}
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
this.quantizationInfo = quantizationInfo;
this.recalibrationTables = recalibrationTables;
this.requestedCovariates = requestedCovariates;
this.argumentTable = argumentTable;
this.RAC = RAC;
this.optionalCovariateIndexes = null;
}
/**
* Counts the number of unique read groups in the table
*
@ -192,11 +183,22 @@ public class RecalibrationReport {
else if ( o instanceof Long )
return (Long)o;
else
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but its not either: " + o.getClass());
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but it's not either: " + o.getClass());
}
private long asLong(final Object o) {
if ( o instanceof Long )
return (Long)o;
else if ( o instanceof Integer )
return ((Integer)o).longValue();
else if ( o instanceof Double )
return ((Double)o).longValue();
else
throw new ReviewedStingException("Object " + o + " is expected to be a long but it's not: " + o.getClass());
}
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
final double nObservations = asDouble(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
final long nObservations = asLong(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
final double nErrors = asDouble(reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME));
final double empiricalQuality = asDouble(reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME));

View File

@ -30,16 +30,13 @@ package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class RecalDatumUnitTest extends BaseTest {
@ -74,7 +71,7 @@ public class RecalDatumUnitTest extends BaseTest {
}
public RecalDatum makeRecalDatum() {
return new RecalDatum(exTotal, exError, (byte)getReportedQual());
return new RecalDatum((long)exTotal, (double)exError, (byte)getReportedQual());
}
@Override
@ -83,13 +80,19 @@ public class RecalDatumUnitTest extends BaseTest {
}
}
private static boolean createdDatumTestProviders = false;
@DataProvider(name = "RecalDatumTestProvider")
public Object[][] makeRecalDatumTestProvider() {
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
for ( int reportedQual : Arrays.asList(10, 20) )
if ( E <= N )
new RecalDatumTestProvider(E, N, reportedQual);
if ( !createdDatumTestProviders ) {
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
for ( int reportedQual : Arrays.asList(10, 20) )
if ( E <= N )
new RecalDatumTestProvider(E, N, reportedQual);
createdDatumTestProviders = true;
}
return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class);
}
@ -104,7 +107,6 @@ public class RecalDatumUnitTest extends BaseTest {
Assert.assertEquals(datum.getNumObservations(), cfg.exTotal, 1E-6);
if ( cfg.getReportedQual() != -1 )
Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual());
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled());
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate());
final double e = datum.getEmpiricalQuality();
@ -175,7 +177,87 @@ public class RecalDatumUnitTest extends BaseTest {
@Test
public void testNoObs() {
final RecalDatum rd = new RecalDatum(0, 0, (byte)10);
final RecalDatum rd = new RecalDatum(0L, 0.0, (byte)10);
Assert.assertEquals(rd.getEmpiricalErrorRate(), 0.0);
}
@Test
public void testlog10QempPrior() {
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
Assert.assertTrue(log10prior < 0.0);
Assert.assertFalse(Double.isInfinite(log10prior));
Assert.assertFalse(Double.isNaN(log10prior));
}
}
final int Qrep = 20;
int maxQemp = -1;
double maxQempValue = -Double.MAX_VALUE;
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
if ( log10prior > maxQempValue ) {
maxQemp = Qemp;
maxQempValue = log10prior;
}
}
Assert.assertEquals(maxQemp, Qrep);
}
@Test
public void testlog10QempLikelihood() {
final int Qrep = 20;
// test no shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(0, 0, Qrep), (double)Qrep);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 0, Qrep), (double)Qrep);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 10, Qrep), (double)Qrep);
// test small shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 10, Qrep), Qrep - 1.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 0, Qrep), Qrep + 1.0);
// test medium shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 0, Qrep), Qrep + 3.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 10, Qrep), Qrep + 3.0);
// test large shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(100000, 10, Qrep), Qrep + 8.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000000, 10, Qrep), Qrep + 16.0);
}
@Test
public void testBayesianEstimateOfEmpiricalQuality() {
final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
final int[] observations = new int[] {0, 10, 1000, 1000000};
final int[] errors = new int[] {0, 10, 1000, 1000000};
for ( double Qemp : Qemps ) {
for ( int observation : observations ) {
for ( int error : errors ) {
if ( error > observation )
continue;
final double log10likelihood = RecalDatum.log10QempLikelihood(Qemp, observation, error);
Assert.assertTrue(observation == 0 ? MathUtils.compareDoubles(log10likelihood, 0.0) == 0 : log10likelihood < 0.0);
Assert.assertFalse(Double.isInfinite(log10likelihood));
Assert.assertFalse(Double.isNaN(log10likelihood));
}
}
}
}
@Test
public void testLongToInt() {
long l = new Long((long)Integer.MAX_VALUE);
int i = RecalDatum.longToInt(l);
Assert.assertEquals(i, Integer.MAX_VALUE);
l++;
i = RecalDatum.longToInt(l);
Assert.assertEquals(i, Integer.MAX_VALUE);
}
}

View File

@ -146,7 +146,7 @@ public final class RecalUtilsUnitTest extends BaseTest {
public NestedIntegerArray<RecalDatum> makeTable(final List<Row> rows) {
final NestedIntegerArray<RecalDatum> x = new NestedIntegerArray<RecalDatum>(3, 3);
for ( final Row r : rows )
x.put(new RecalDatum(r.no, r.ne, (byte)10), r.rg, r.qual);
x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual);
return x;
}
}

View File

@ -20,9 +20,11 @@ public class RecalibrationReportUnitTest {
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
int nErrors = random.nextInt(maxErrors);
while ( nErrors > nObservations )
nErrors = random.nextInt(maxErrors);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
}
@Test(enabled = true)