Initial cleanup of RecalDatum for move and further refactoring
-- Moved Datum, the now unnecessary superclass, into RecalDatum -- Fixed some obviously dangerous synchronization errors in RecalDatum, though these may not have caused problems because they may not have been called in parallel mode
This commit is contained in:
parent
0670316288
commit
191294eedc
|
|
@ -1,109 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jan 6, 2010
|
||||
*
|
||||
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
|
||||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class Datum {
|
||||
|
||||
long numObservations; // number of bases seen in total
|
||||
long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Datum() {
|
||||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
}
|
||||
|
||||
public Datum(long numObservations, long numMismatches) {
|
||||
this.numObservations = numObservations;
|
||||
this.numMismatches = numMismatches;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
}
|
||||
|
||||
synchronized void increment(final boolean isError) {
|
||||
numObservations++;
|
||||
numMismatches += isError ? 1:0;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
|
||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
byte empiricalQualByte() {
|
||||
final double doubleMismatches = (double) (numMismatches);
|
||||
final double doubleObservations = (double) (numObservations);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof Datum))
|
||||
return false;
|
||||
Datum other = (Datum) o;
|
||||
return numMismatches == other.numMismatches && numObservations == other.numObservations;
|
||||
}
|
||||
}
|
||||
|
|
@ -33,20 +33,40 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
|
||||
* of reference mismatches seen for that combination of covariates.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatum extends Datum {
|
||||
|
||||
public class RecalDatum {
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
/**
|
||||
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
*/
|
||||
private double estimatedQReported;
|
||||
|
||||
/**
|
||||
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
*/
|
||||
private double empiricalQuality;
|
||||
|
||||
/**
|
||||
* number of bases seen in total
|
||||
*/
|
||||
long numObservations;
|
||||
|
||||
/**
|
||||
* number of bases seen that didn't match the reference
|
||||
*/
|
||||
long numMismatches;
|
||||
|
||||
/**
|
||||
* used when calculating empirical qualities to avoid division by zero
|
||||
*/
|
||||
private static final int SMOOTHING_CONSTANT = 1;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -68,26 +88,24 @@ public class RecalDatum extends Datum {
|
|||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum other) {
|
||||
public synchronized void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
increment(other.numObservations, other.numMismatches);
|
||||
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void increment(final boolean isError) {
|
||||
super.increment(isError);
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
protected final void calcEmpiricalQuality() {
|
||||
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||
private synchronized final void calcEmpiricalQuality() {
|
||||
// cache the value so we don't call log over and over again
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
public void setEstimatedQReported(final double estimatedQReported) {
|
||||
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
||||
this.estimatedQReported = estimatedQReported;
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +113,7 @@ public class RecalDatum extends Datum {
|
|||
return estimatedQReported;
|
||||
}
|
||||
|
||||
public void setEmpiricalQuality(final double empiricalQuality) {
|
||||
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
||||
this.empiricalQuality = empiricalQuality;
|
||||
}
|
||||
|
||||
|
|
@ -145,4 +163,22 @@ public class RecalDatum extends Datum {
|
|||
return super.equals(o) &&
|
||||
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
synchronized void increment(final boolean isError) {
|
||||
numObservations++;
|
||||
numMismatches += isError ? 1:0;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue