Initial cleanup of RecalDatum for move and further refactoring
-- Moved Datum, the now unnecessary superclass, into RecalDatum -- Fixed some obviously dangerous synchronization errors in RecalDatum, though these may not have caused problems because they may not have been called in parallel mode
This commit is contained in:
parent
0670316288
commit
191294eedc
|
|
@ -1,109 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: rpoplin
|
|
||||||
* Date: Jan 6, 2010
|
|
||||||
*
|
|
||||||
* An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
|
|
||||||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class Datum {
|
|
||||||
|
|
||||||
long numObservations; // number of bases seen in total
|
|
||||||
long numMismatches; // number of bases seen that didn't match the reference
|
|
||||||
|
|
||||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// constructors
|
|
||||||
//
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
public Datum() {
|
|
||||||
numObservations = 0L;
|
|
||||||
numMismatches = 0L;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Datum(long numObservations, long numMismatches) {
|
|
||||||
this.numObservations = numObservations;
|
|
||||||
this.numMismatches = numMismatches;
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// increment methods
|
|
||||||
//
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
|
||||||
numObservations += incObservations;
|
|
||||||
numMismatches += incMismatches;
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized void increment(final boolean isError) {
|
|
||||||
numObservations++;
|
|
||||||
numMismatches += isError ? 1:0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// methods to derive empirical quality score
|
|
||||||
//
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
double empiricalQualDouble() {
|
|
||||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
|
||||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
|
|
||||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
|
||||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
byte empiricalQualByte() {
|
|
||||||
final double doubleMismatches = (double) (numMismatches);
|
|
||||||
final double doubleObservations = (double) (numObservations);
|
|
||||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (!(o instanceof Datum))
|
|
||||||
return false;
|
|
||||||
Datum other = (Datum) o;
|
|
||||||
return numMismatches == other.numMismatches && numObservations == other.numObservations;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -33,20 +33,40 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
|
||||||
|
* of reference mismatches seen for that combination of covariates.
|
||||||
|
*
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 3, 2009
|
* Date: Nov 3, 2009
|
||||||
*
|
|
||||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
|
||||||
*/
|
*/
|
||||||
|
public class RecalDatum {
|
||||||
public class RecalDatum extends Datum {
|
|
||||||
|
|
||||||
private static final double UNINITIALIZED = -1.0;
|
private static final double UNINITIALIZED = -1.0;
|
||||||
|
|
||||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
/**
|
||||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||||
|
*/
|
||||||
|
private double estimatedQReported;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||||
|
*/
|
||||||
|
private double empiricalQuality;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* number of bases seen in total
|
||||||
|
*/
|
||||||
|
long numObservations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* number of bases seen that didn't match the reference
|
||||||
|
*/
|
||||||
|
long numMismatches;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* used when calculating empirical qualities to avoid division by zero
|
||||||
|
*/
|
||||||
|
private static final int SMOOTHING_CONSTANT = 1;
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -68,26 +88,24 @@ public class RecalDatum extends Datum {
|
||||||
this.empiricalQuality = copy.empiricalQuality;
|
this.empiricalQuality = copy.empiricalQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void combine(final RecalDatum other) {
|
public synchronized void combine(final RecalDatum other) {
|
||||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||||
increment(other.numObservations, other.numMismatches);
|
increment(other.numObservations, other.numMismatches);
|
||||||
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||||
empiricalQuality = UNINITIALIZED;
|
empiricalQuality = UNINITIALIZED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void increment(final boolean isError) {
|
|
||||||
super.increment(isError);
|
|
||||||
empiricalQuality = UNINITIALIZED;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Requires("empiricalQuality == UNINITIALIZED")
|
@Requires("empiricalQuality == UNINITIALIZED")
|
||||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||||
protected final void calcEmpiricalQuality() {
|
private synchronized final void calcEmpiricalQuality() {
|
||||||
empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
// cache the value so we don't call log over and over again
|
||||||
|
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||||
|
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||||
|
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||||
|
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEstimatedQReported(final double estimatedQReported) {
|
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
||||||
this.estimatedQReported = estimatedQReported;
|
this.estimatedQReported = estimatedQReported;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -95,7 +113,7 @@ public class RecalDatum extends Datum {
|
||||||
return estimatedQReported;
|
return estimatedQReported;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEmpiricalQuality(final double empiricalQuality) {
|
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
||||||
this.empiricalQuality = empiricalQuality;
|
this.empiricalQuality = empiricalQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -145,4 +163,22 @@ public class RecalDatum extends Datum {
|
||||||
return super.equals(o) &&
|
return super.equals(o) &&
|
||||||
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// increment methods
|
||||||
|
//
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||||
|
numObservations += incObservations;
|
||||||
|
numMismatches += incMismatches;
|
||||||
|
empiricalQuality = UNINITIALIZED;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized void increment(final boolean isError) {
|
||||||
|
numObservations++;
|
||||||
|
numMismatches += isError ? 1:0;
|
||||||
|
empiricalQuality = UNINITIALIZED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue