Extensive unit tests, contacts, and documentation for RecalDatum

This commit is contained in:
Mark DePristo 2012-07-27 14:34:13 -04:00
parent e00ed8bc5e
commit 57b45bfb1e
3 changed files with 93 additions and 24 deletions

View File

@ -69,9 +69,14 @@ public class QualityUtils {
* @return a probability (0.0 - 1.0)
*/
static private double qualToErrorProbRaw(int qual) {
return qualToErrorProb((double) qual);
}
public static double qualToErrorProb(final double qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
}
static public double qualToErrorProb(byte qual) {
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.recalibration;
*/
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -40,6 +41,17 @@ import java.util.Random;
* User: rpoplin
* Date: Nov 3, 2009
*/
@Invariant({
"estimatedQReported >= 0.0",
"! Double.isNaN(estimatedQReported)",
"! Double.isInfinite(estimatedQReported)",
"empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
"! Double.isNaN(empiricalQuality)",
"! Double.isInfinite(empiricalQuality)",
"numObservations >= 0",
"numMismatches >= 0",
"numMismatches <= numObservations"
})
public class RecalDatum {
private static final double UNINITIALIZED = -1.0;
@ -74,13 +86,28 @@ public class RecalDatum {
//
//---------------------------------------------------------------------------------------------------------------
/**
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
*
* @param _numObservations
* @param _numMismatches
* @param reportedQuality
*/
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
numObservations = _numObservations;
numMismatches = _numMismatches;
estimatedQReported = reportedQuality;
empiricalQuality = UNINITIALIZED;
}
/**
* Copy copy into this recal datum, overwriting all of this objects data
* @param copy
*/
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.getNumObservations();
this.numMismatches = copy.getNumMismatches();
@ -88,6 +115,12 @@ public class RecalDatum {
this.empiricalQuality = copy.empiricalQuality;
}
/**
* Add in all of the data from other into this object, updating the reported quality from the expected
* error rate implied by the two reported qualities
*
* @param other
*/
public synchronized void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.getNumObservations(), other.getNumMismatches());
@ -95,26 +128,26 @@ public class RecalDatum {
empiricalQuality = UNINITIALIZED;
}
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized final void calcEmpiricalQuality() {
// cache the value so we don't call log over and over again
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
// smoothing is one error and one non-error observation, for example
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
public synchronized void setEstimatedQReported(final double estimatedQReported) {
if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
this.estimatedQReported = estimatedQReported;
}
public final double getEstimatedQReported() {
return estimatedQReported;
}
public final byte getEstimatedQReportedAsByte() {
return (byte)(int)(Math.round(getEstimatedQReported()));
}
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
this.empiricalQuality = empiricalQuality;
}
@ -133,14 +166,6 @@ public class RecalDatum {
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
}
private double calcExpectedErrors() {
return (double) getNumObservations() * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
@ -176,6 +201,7 @@ public class RecalDatum {
}
public synchronized void setNumObservations(final long numObservations) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
this.numObservations = numObservations;
empiricalQuality = UNINITIALIZED;
}
@ -184,29 +210,67 @@ public class RecalDatum {
return numMismatches;
}
@Requires({"numMismatches >= 0"})
public synchronized void setNumMismatches(final long numMismatches) {
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
this.numMismatches = numMismatches;
empiricalQuality = UNINITIALIZED;
}
@Requires({"by >= 0"})
public synchronized void incrementNumObservations(final long by) {
numObservations += by;
empiricalQuality = UNINITIALIZED;
}
@Requires({"by >= 0"})
public synchronized void incrementNumMismatches(final long by) {
numMismatches += by;
empiricalQuality = UNINITIALIZED;
}
@Requires({"incObservations >= 0", "incMismatches >= 0"})
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
public synchronized void increment(final long incObservations, final long incMismatches) {
incrementNumObservations(incObservations);
incrementNumMismatches(incMismatches);
}
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
public synchronized void increment(final boolean isError) {
incrementNumObservations(1);
if ( isError )
incrementNumMismatches(1);
}
// -------------------------------------------------------------------------------------
//
// Private implementation helper functions
//
// -------------------------------------------------------------------------------------
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized final void calcEmpiricalQuality() {
// cache the value so we don't call log over and over again
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
// smoothing is one error and one non-error observation, for example
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
/**
* calculate the expected number of errors given the estimated Q reported and the number of observations
* in this datum.
*
* @return a positive (potentially fractional) estimate of the number of errors
*/
@Ensures("result >= 0.0")
private double calcExpectedErrors() {
return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
}
}

View File

@ -282,12 +282,12 @@ public abstract class BaseTest {
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
Assert.assertTrue(actual instanceof Double);
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
}
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
Assert.assertTrue(actual instanceof Double);
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
}
@ -303,13 +303,13 @@ public abstract class BaseTest {
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isNaN(actual));
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isInfinite(actual));
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
else {
final double delta = Math.abs(actual - expected);
final double ratio = Math.abs(actual / expected - 1.0);
Assert.assertTrue(delta < tolerance || ratio < tolerance);
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
}
}
}