Initial cleanup of RecalDatum for move and further refactoring

-- Moved Datum, the now unnecessary superclass, into RecalDatum
-- Fixed some obviously dangerous synchronization errors in RecalDatum, though these may not have caused problems because they may not have been called in parallel mode
This commit is contained in:
Mark DePristo 2012-07-27 09:39:16 -04:00
parent 0670316288
commit 191294eedc
2 changed files with 54 additions and 127 deletions

View File

@ -1,109 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jan 6, 2010
*
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
// constructors
//
//---------------------------------------------------------------------------------------------------------------
public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
public Datum(long numObservations, long numMismatches) {
this.numObservations = numObservations;
this.numMismatches = numMismatches;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
synchronized void increment(final boolean isError) {
numObservations++;
numMismatches += isError ? 1:0;
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Datum))
return false;
Datum other = (Datum) o;
return numMismatches == other.numMismatches && numObservations == other.numObservations;
}
}

View File

@ -33,20 +33,40 @@ import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
* of reference mismatches seen for that combination of covariates.
*
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum extends Datum {
public class RecalDatum {
private static final double UNINITIALIZED = -1.0;
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
/**
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
*/
private double estimatedQReported;
/**
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
*/
private double empiricalQuality;
/**
* number of bases seen in total
*/
long numObservations;
/**
* number of bases seen that didn't match the reference
*/
long numMismatches;
/**
* used when calculating empirical qualities to avoid division by zero
*/
private static final int SMOOTHING_CONSTANT = 1;
//---------------------------------------------------------------------------------------------------------------
//
@ -68,26 +88,24 @@ public class RecalDatum extends Datum {
this.empiricalQuality = copy.empiricalQuality;
}
public void combine(final RecalDatum other) {
public synchronized void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
increment(other.numObservations, other.numMismatches);
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
empiricalQuality = UNINITIALIZED;
}
@Override
public void increment(final boolean isError) {
super.increment(isError);
empiricalQuality = UNINITIALIZED;
}
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
protected final void calcEmpiricalQuality() {
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
private synchronized final void calcEmpiricalQuality() {
// cache the value so we don't call log over and over again
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
public void setEstimatedQReported(final double estimatedQReported) {
public synchronized void setEstimatedQReported(final double estimatedQReported) {
this.estimatedQReported = estimatedQReported;
}
@ -95,7 +113,7 @@ public class RecalDatum extends Datum {
return estimatedQReported;
}
public void setEmpiricalQuality(final double empiricalQuality) {
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
this.empiricalQuality = empiricalQuality;
}
@ -145,4 +163,22 @@ public class RecalDatum extends Datum {
return super.equals(o) &&
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
empiricalQuality = UNINITIALIZED;
}
synchronized void increment(final boolean isError) {
numObservations++;
numMismatches += isError ? 1:0;
empiricalQuality = UNINITIALIZED;
}
}