Extensive unit tests, contacts, and documentation for RecalDatum
This commit is contained in:
parent
e00ed8bc5e
commit
57b45bfb1e
|
|
@ -69,9 +69,14 @@ public class QualityUtils {
|
||||||
* @return a probability (0.0 - 1.0)
|
* @return a probability (0.0 - 1.0)
|
||||||
*/
|
*/
|
||||||
static private double qualToErrorProbRaw(int qual) {
|
static private double qualToErrorProbRaw(int qual) {
|
||||||
|
return qualToErrorProb((double) qual);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double qualToErrorProb(final double qual) {
|
||||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static public double qualToErrorProb(byte qual) {
|
static public double qualToErrorProb(byte qual) {
|
||||||
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Invariant;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
@ -40,6 +41,17 @@ import java.util.Random;
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 3, 2009
|
* Date: Nov 3, 2009
|
||||||
*/
|
*/
|
||||||
|
@Invariant({
|
||||||
|
"estimatedQReported >= 0.0",
|
||||||
|
"! Double.isNaN(estimatedQReported)",
|
||||||
|
"! Double.isInfinite(estimatedQReported)",
|
||||||
|
"empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
|
||||||
|
"! Double.isNaN(empiricalQuality)",
|
||||||
|
"! Double.isInfinite(empiricalQuality)",
|
||||||
|
"numObservations >= 0",
|
||||||
|
"numMismatches >= 0",
|
||||||
|
"numMismatches <= numObservations"
|
||||||
|
})
|
||||||
public class RecalDatum {
|
public class RecalDatum {
|
||||||
private static final double UNINITIALIZED = -1.0;
|
private static final double UNINITIALIZED = -1.0;
|
||||||
|
|
||||||
|
|
@ -74,13 +86,28 @@ public class RecalDatum {
|
||||||
//
|
//
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
|
||||||
|
*
|
||||||
|
* @param _numObservations
|
||||||
|
* @param _numMismatches
|
||||||
|
* @param reportedQuality
|
||||||
|
*/
|
||||||
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
|
public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
|
||||||
|
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||||
|
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||||
|
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
|
||||||
|
|
||||||
numObservations = _numObservations;
|
numObservations = _numObservations;
|
||||||
numMismatches = _numMismatches;
|
numMismatches = _numMismatches;
|
||||||
estimatedQReported = reportedQuality;
|
estimatedQReported = reportedQuality;
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy copy into this recal datum, overwriting all of this objects data
|
||||||
|
* @param copy
|
||||||
|
*/
|
||||||
public RecalDatum(final RecalDatum copy) {
|
public RecalDatum(final RecalDatum copy) {
|
||||||
this.numObservations = copy.getNumObservations();
|
this.numObservations = copy.getNumObservations();
|
||||||
this.numMismatches = copy.getNumMismatches();
|
this.numMismatches = copy.getNumMismatches();
|
||||||
|
|
@ -88,6 +115,12 @@ public class RecalDatum {
|
||||||
this.empiricalQuality = copy.empiricalQuality;
|
this.empiricalQuality = copy.empiricalQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add in all of the data from other into this object, updating the reported quality from the expected
|
||||||
|
* error rate implied by the two reported qualities
|
||||||
|
*
|
||||||
|
* @param other
|
||||||
|
*/
|
||||||
public synchronized void combine(final RecalDatum other) {
|
public synchronized void combine(final RecalDatum other) {
|
||||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||||
increment(other.getNumObservations(), other.getNumMismatches());
|
increment(other.getNumObservations(), other.getNumMismatches());
|
||||||
|
|
@ -95,26 +128,26 @@ public class RecalDatum {
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("empiricalQuality == UNINITIALIZED")
|
|
||||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
|
||||||
private synchronized final void calcEmpiricalQuality() {
|
|
||||||
// cache the value so we don't call log over and over again
|
|
||||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
|
||||||
// smoothing is one error and one non-error observation, for example
|
|
||||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
|
|
||||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
|
||||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
||||||
|
if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
|
||||||
|
if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
|
||||||
|
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
|
||||||
|
|
||||||
this.estimatedQReported = estimatedQReported;
|
this.estimatedQReported = estimatedQReported;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final double getEstimatedQReported() {
|
public final double getEstimatedQReported() {
|
||||||
return estimatedQReported;
|
return estimatedQReported;
|
||||||
}
|
}
|
||||||
|
public final byte getEstimatedQReportedAsByte() {
|
||||||
|
return (byte)(int)(Math.round(getEstimatedQReported()));
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
||||||
|
if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
|
||||||
|
if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
|
||||||
|
if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
|
||||||
|
|
||||||
this.empiricalQuality = empiricalQuality;
|
this.empiricalQuality = empiricalQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -133,14 +166,6 @@ public class RecalDatum {
|
||||||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calcExpectedErrors() {
|
|
||||||
return (double) getNumObservations() * qualToErrorProb(estimatedQReported);
|
|
||||||
}
|
|
||||||
|
|
||||||
private double qualToErrorProb(final double qual) {
|
|
||||||
return Math.pow(10.0, qual / -10.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||||
final Random random = new Random();
|
final Random random = new Random();
|
||||||
final int nObservations = random.nextInt(maxObservations);
|
final int nObservations = random.nextInt(maxObservations);
|
||||||
|
|
@ -176,6 +201,7 @@ public class RecalDatum {
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void setNumObservations(final long numObservations) {
|
public synchronized void setNumObservations(final long numObservations) {
|
||||||
|
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||||
this.numObservations = numObservations;
|
this.numObservations = numObservations;
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
@ -184,29 +210,67 @@ public class RecalDatum {
|
||||||
return numMismatches;
|
return numMismatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"numMismatches >= 0"})
|
||||||
public synchronized void setNumMismatches(final long numMismatches) {
|
public synchronized void setNumMismatches(final long numMismatches) {
|
||||||
|
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||||
this.numMismatches = numMismatches;
|
this.numMismatches = numMismatches;
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"by >= 0"})
|
||||||
public synchronized void incrementNumObservations(final long by) {
|
public synchronized void incrementNumObservations(final long by) {
|
||||||
numObservations += by;
|
numObservations += by;
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"by >= 0"})
|
||||||
public synchronized void incrementNumMismatches(final long by) {
|
public synchronized void incrementNumMismatches(final long by) {
|
||||||
numMismatches += by;
|
numMismatches += by;
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
||||||
|
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
||||||
public synchronized void increment(final long incObservations, final long incMismatches) {
|
public synchronized void increment(final long incObservations, final long incMismatches) {
|
||||||
incrementNumObservations(incObservations);
|
incrementNumObservations(incObservations);
|
||||||
incrementNumMismatches(incMismatches);
|
incrementNumMismatches(incMismatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
||||||
public synchronized void increment(final boolean isError) {
|
public synchronized void increment(final boolean isError) {
|
||||||
incrementNumObservations(1);
|
incrementNumObservations(1);
|
||||||
if ( isError )
|
if ( isError )
|
||||||
incrementNumMismatches(1);
|
incrementNumMismatches(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Private implementation helper functions
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
|
||||||
|
*/
|
||||||
|
@Requires("empiricalQuality == UNINITIALIZED")
|
||||||
|
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||||
|
private synchronized final void calcEmpiricalQuality() {
|
||||||
|
// cache the value so we don't call log over and over again
|
||||||
|
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||||
|
// smoothing is one error and one non-error observation, for example
|
||||||
|
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
|
||||||
|
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||||
|
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* calculate the expected number of errors given the estimated Q reported and the number of observations
|
||||||
|
* in this datum.
|
||||||
|
*
|
||||||
|
* @return a positive (potentially fractional) estimate of the number of errors
|
||||||
|
*/
|
||||||
|
@Ensures("result >= 0.0")
|
||||||
|
private double calcExpectedErrors() {
|
||||||
|
return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -282,12 +282,12 @@ public abstract class BaseTest {
|
||||||
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
|
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
|
||||||
|
|
||||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
|
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
|
||||||
Assert.assertTrue(actual instanceof Double);
|
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
|
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
|
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
|
||||||
Assert.assertTrue(actual instanceof Double);
|
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
|
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -303,13 +303,13 @@ public abstract class BaseTest {
|
||||||
|
|
||||||
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
||||||
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
||||||
Assert.assertTrue(Double.isNaN(actual));
|
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
|
||||||
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
|
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
|
||||||
Assert.assertTrue(Double.isInfinite(actual));
|
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
|
||||||
else {
|
else {
|
||||||
final double delta = Math.abs(actual - expected);
|
final double delta = Math.abs(actual - expected);
|
||||||
final double ratio = Math.abs(actual / expected - 1.0);
|
final double ratio = Math.abs(actual / expected - 1.0);
|
||||||
Assert.assertTrue(delta < tolerance || ratio < tolerance);
|
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue