From 191294eedc9ddb7033f1e3fac304ef5b2bb6eee5 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jul 2012 09:39:16 -0400 Subject: [PATCH] Initial cleanup of RecalDatum for move and further refactoring -- Moved Datum, the now unnecessary superclass, into RecalDatum -- Fixed some obviously dangerous synchronization errors in RecalDatum, though these may not have caused problems because they may not have been called in parallel mode --- .../sting/gatk/walkers/bqsr/Datum.java | 109 ------------------ .../sting/gatk/walkers/bqsr/RecalDatum.java | 72 +++++++++--- 2 files changed, 54 insertions(+), 127 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java deleted file mode 100644 index d7e8e16b5..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -import org.broadinstitute.sting.utils.QualityUtils; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 6, 2010 - * - * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed. - * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class Datum { - - long numObservations; // number of bases seen in total - long numMismatches; // number of bases seen that didn't match the reference - - private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public Datum() { - numObservations = 0L; - numMismatches = 0L; - } - - public Datum(long numObservations, long numMismatches) { - this.numObservations = numObservations; - this.numMismatches = numMismatches; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // increment methods - // - //--------------------------------------------------------------------------------------------------------------- - - synchronized void increment(final long incObservations, final long incMismatches) { - numObservations += incObservations; - numMismatches += incMismatches; - } - - synchronized void increment(final boolean isError) { - numObservations++; - numMismatches += isError ? 1:0; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // methods to derive empirical quality score - // - //--------------------------------------------------------------------------------------------------------------- - - double empiricalQualDouble() { - final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example - final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); - } - - byte empiricalQualByte() { - final double doubleMismatches = (double) (numMismatches); - final double doubleObservations = (double) (numObservations); - return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40 - } - - @Override - public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte()); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Datum)) - return false; - Datum other = (Datum) o; - return numMismatches == other.numMismatches && numObservations == other.numObservations; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index 9b00b1876..ed4e769b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -33,20 +33,40 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.Random; /** + * An individual piece of recalibration data. Each bin counts up the number of observations and the number + * of reference mismatches seen for that combination of covariates. + * * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 - * - * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. */ - -public class RecalDatum extends Datum { - +public class RecalDatum { private static final double UNINITIALIZED = -1.0; - private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations - private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + /** + * estimated reported quality score based on combined data's individual q-reporteds and number of observations + */ + private double estimatedQReported; + /** + * the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + */ + private double empiricalQuality; + + /** + * number of bases seen in total + */ + long numObservations; + + /** + * number of bases seen that didn't match the reference + */ + long numMismatches; + + /** + * used when calculating empirical qualities to avoid division by zero + */ + private static final int SMOOTHING_CONSTANT = 1; //--------------------------------------------------------------------------------------------------------------- // @@ -68,26 +88,24 @@ public class RecalDatum extends Datum { this.empiricalQuality = copy.empiricalQuality; } - public void combine(final RecalDatum other) { + public synchronized void combine(final RecalDatum other) { final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); increment(other.numObservations, other.numMismatches); estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); empiricalQuality = UNINITIALIZED; } - @Override - public void increment(final boolean isError) { - super.increment(isError); - empiricalQuality = UNINITIALIZED; - } - @Requires("empiricalQuality == UNINITIALIZED") @Ensures("empiricalQuality != UNINITIALIZED") - protected final void calcEmpiricalQuality() { - empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again + private synchronized final void calcEmpiricalQuality() { + // cache the value so we don't call log over and over again + final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); + final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); + empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } - public void setEstimatedQReported(final double estimatedQReported) { + public synchronized void setEstimatedQReported(final double estimatedQReported) { this.estimatedQReported = estimatedQReported; } @@ -95,7 +113,7 @@ public class RecalDatum extends Datum { return estimatedQReported; } - public void setEmpiricalQuality(final double empiricalQuality) { + public synchronized void setEmpiricalQuality(final double empiricalQuality) { this.empiricalQuality = empiricalQuality; } @@ -145,4 +163,22 @@ public class RecalDatum extends Datum { return super.equals(o) && MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; } + + //--------------------------------------------------------------------------------------------------------------- + // + // increment methods + // + //--------------------------------------------------------------------------------------------------------------- + + synchronized void increment(final long incObservations, final long incMismatches) { + numObservations += incObservations; + numMismatches += incMismatches; + empiricalQuality = UNINITIALIZED; + } + + synchronized void increment(final boolean isError) { + numObservations++; + numMismatches += isError ? 1:0; + empiricalQuality = UNINITIALIZED; + } }