Cleanup and unit tests for QualityUtils

-- Fixed a few conversion bugs with edge case quals (ones that were very high) -- Fixed a critical bug in the conversion of quals that was causing near capped quals to fall below their actual value. Will undoubtedly need to fix md5s -- More precise prob -> qual calculations for very high confidence events in phredScaleCorrectRate, trueProbToQual, and errorProbToQual. Very likely to improve accuracy of many calculations in the GATK -- Added errorProbToQual and trueProbToQual calculations that accept an integer cap, and perform the (tricky) conversion from int to byte correctly. -- Full docs and unit tests for phredScaleCorrectRate and phredScaleErrorRate. -- Renamed probToQual to trueProbToQual -- Added goodProbability and log10OneMinusX to MathUtils -- Went through the GATK and cleaned up many uses of QualityUtils -- Cleanup constants in QualityUtils -- Added full docs for all of the constants -- Rename MAX_QUAL_SCORE to MAX_SAM_QUAL_SCORE for clarity -- Moved MAX_GATK_USABLE_Q_SCORE to RecalDatum, as it's s BQSR specific feature -- Convert uses of QualityUtils.errorProbToQual(1-x) to QualityUtils.trueProbToQual(x) -- Cleanup duplicate quality score routines in MathUtils. Moved and renamed MathUtils.log10ProbabilityToPhredScale => QualityUtils.phredScaleLog10ErrorRate. Removed 3 routines from MathUtils, and remapped their usages into the better routines in QualityUtils
2013-02-09 13:04:30 -05:00 · 2013-02-09 13:04:30 -05:00 · 9e28d1e347
parent 307f709cc7
commit 9e28d1e347
25 changed files with 470 additions and 146 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java
@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo {
    }

    private boolean validQual(final byte result) {
-        return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE;
+        return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE;
    }
 }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@ -267,7 +267,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
    //
    // -------------------------------------------------------------------------------------

-    static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
+    static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_SAM_QUAL_SCORE +1][BaseUtils.BASES.length+1][QualityUtils.MAX_SAM_QUAL_SCORE +1][MAX_PLOIDY];

    protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
        return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -123,7 +124,7 @@ public class ErrorModel  {
            }
        }

-        double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
+        double p = QualityUtils.qualToErrorProbLog10((byte)(maxQualityScore-minQualityScore));
        if (refSamplePileup == null || refSampleVC == null  || !hasCalledAlleles) {
            for (byte q=minQualityScore; q<=maxQualityScore; q++) {
                // maximum uncertainty if there's no ref data at site
@ -270,7 +271,7 @@ public class ErrorModel  {
    })
    private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) {
        // same as   log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows
-        double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage;
+        double lambda = QualityUtils.qualToErrorProb(q) * (double )coverage;
        // log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k)
        return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches);
    }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -613,7 +613,7 @@ public class UnifiedGenotyperEngine {
            P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
        }

-        return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
+        return new VariantCallContext(vc, QualityUtils.phredScaleCorrectRate(P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
    }

    protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.samples.Sample;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.help.HelpConstants;
 import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
@ -395,7 +396,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa

            int phredScoreTransmission = -1;
            if(transmissionProb != NO_TRANSMISSION_PROB){
-                double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
+                double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
                phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
            }
           //Handle null, missing and unavailable genotypes
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@ -1017,7 +1017,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
            // Determine the phase at this position:
            this.maxEntry = hapTable.maxEntry();

-            // convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
+            // convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.trueProbToQual(posteriorProb):
            PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
            for (PhasingTable.PhasingTableEntry pte : hapTable) {
                if (pte != maxEntry)
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -52,7 +52,6 @@ import net.sf.samtools.SAMUtils;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -174,7 +173,7 @@ public class BaseRecalibration {
                        double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );

                        // recalibrated quality is bound between 1 and MAX_QUAL
-                        final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
+                        final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), RecalDatum.MAX_RECALIBRATED_Q_SCORE);

                        // return the quantized version of the recalibrated quality
                        final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java
@ -162,7 +162,7 @@ public class QualQuantizer {
            "nObservations >= 0",
            "nErrors >= 0",
            "nErrors <= nObservations",
-            "fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE",
+            "fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE",
            "mergeOrder >= 0"})
    protected final class QualInterval implements Comparable<QualInterval> {
        final int qStart, qEnd, fixedQual, level;
@ -224,10 +224,10 @@ public class QualQuantizer {
        /**
         * @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
         */
-        @Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
+        @Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE")
        public byte getQual() {
            if ( ! hasFixedQual() )
-                return QualityUtils.probToQual(1-getErrorRate(), 0);
+                return QualityUtils.errorProbToQual(getErrorRate());
            else
                return (byte)fixedQual;
        }
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
@ -76,7 +76,7 @@ public class QuantizationInfo {
    }
    
    public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
-        final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
+        final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution
        for (int i = 0; i < qualHistogram.length; i++)
            qualHistogram[i] = 0L;

@ -100,7 +100,7 @@ public class QuantizationInfo {
    }

    public void noQuantization() {
-        this.quantizationLevels = QualityUtils.MAX_QUAL_SCORE;
+        this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE;
        for (int i = 0; i < this.quantizationLevels; i++)
            quantizedQuals.set(i, (byte) i);
    }
@ -124,7 +124,7 @@ public class QuantizationInfo {
        quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
        quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);

-        for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
+        for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
            quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
            quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
            quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
@ -74,10 +74,10 @@ package org.broadinstitute.sting.utils.recalibration;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Invariant;
 import com.google.java.contract.Requires;
+import net.sf.samtools.SAMUtils;
 import org.apache.commons.math.optimization.fitting.GaussianFunction;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;


 /**
@ -100,6 +100,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
        "numMismatches <= numObservations"
 })
 public class RecalDatum {
+    public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
    private static final double UNINITIALIZED = -1.0;

    /**
@ -337,7 +338,7 @@ public class RecalDatum {
        // This is the old and busted point estimate approach:
        //final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());

-        empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
+        empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE);
    }

    //static final boolean DEBUG = false;
@ -369,7 +370,12 @@ public class RecalDatum {
        return Qemp;
    }

-    static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
+    /**
+     * Quals above this value should be capped down to this value (because they are too high)
+     * in the base quality score recalibrator
+     */
+    public final static byte MAX_GATK_USABLE_Q_SCORE = 40;
+    static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1];
    static {
        // f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
        // Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
@ -379,7 +385,7 @@ public class RecalDatum {
        final double GF_d = 0.5;   // with these parameters, deltas can shift at most ~20 Q points

        final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
-        for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
+        for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) {
            double log10Prior = Math.log10(gaussian.value((double) i));
            if ( Double.isInfinite(log10Prior) )
                log10Prior = -Double.MAX_VALUE;
@ -388,7 +394,7 @@ public class RecalDatum {
    }

    static protected double log10QempPrior(final double Qempirical, final double Qreported) {
-        final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
+        final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE);
        //if ( DEBUG )
        //    System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
        return log10QempPriorCache[difference];
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
@ -263,11 +263,11 @@ public class RecalibrationReport {
     * Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
     *
     * @param table the GATKReportTable containing the quantization mappings
-     * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
+     * @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE
     */
    private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
-        final Byte[] quals  = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
-        final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
+        final Byte[] quals  = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
+        final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
        for ( int i = 0; i < table.getNumRows(); i++ ) {
            final byte originalQual = (byte)i;
            final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
--- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
@ -119,6 +119,6 @@ public class QualityScoreCovariate implements RequiredCovariate {

    @Override
    public int maximumKeyValue() {
-        return QualityUtils.MAX_QUAL_SCORE;
+        return QualityUtils.MAX_SAM_QUAL_SCORE;
    }
 }
--- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java
@ -90,7 +90,7 @@ public class QualQuantizerUnitTest extends BaseTest {
            this.exError = exError;
            this.exTotal = exTotal;
            this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1));
-            this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0);
+            this.exQual = QualityUtils.errorProbToQual(this.exErrorRate);
        }
    }

--- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java
@ -50,7 +50,6 @@ package org.broadinstitute.sting.utils.recalibration;
 // the imports for unit testing.


-import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
@ -58,7 +57,6 @@ import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;

@ -207,8 +205,8 @@ public class RecalDatumUnitTest extends BaseTest {

    @Test
    public void testlog10QempPrior() {
-        for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
-            for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
+        for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
+            for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) {
                final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
                Assert.assertTrue(log10prior < 0.0);
                Assert.assertFalse(Double.isInfinite(log10prior));
@ -219,7 +217,7 @@ public class RecalDatumUnitTest extends BaseTest {
        final int Qrep = 20;
        int maxQemp = -1;
        double maxQempValue = -Double.MAX_VALUE;
-        for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
+        for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
            final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
            if ( log10prior > maxQempValue ) {
                maxQemp = Qemp;
--- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
@ -67,7 +67,7 @@ public class RecalibrationReportUnitTest {
        final Random random = new Random();
        final int nObservations = random.nextInt(maxObservations);
        final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
-        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
+        final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE);
        return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
    }

@ -75,10 +75,10 @@ public class RecalibrationReportUnitTest {
    public void testOutput() {
        final int length = 100;

-        List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_QUAL_SCORE + 1);
-        List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_QUAL_SCORE + 1);
+        List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
+        List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);

-        for (int i = 0;  i<= QualityUtils.MAX_QUAL_SCORE; i++) {
+        for (int i = 0;  i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
            quals.add((byte) i);
            counts.add(1L);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
@ -200,7 +200,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
            final int mismatches = (Integer)table.get(key, "mismatches");
            final int count = (Integer)table.get(key, "counts");
            final double errorRate = (mismatches + 1) / (1.0*(count + 1));
-            final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
+            final int qual = QualityUtils.errorProbToQual(errorRate);
            table.set(key, "qual", qual);
            table.set(key, "errorrate", errorRate);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java
@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.Window;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.help.HelpConstants;
+import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
 import org.broadinstitute.variant.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -408,7 +409,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
            return genotype.getGQ() >= minGenotypeQuality;
        } else if ( genotype.hasLikelihoods() ) {
            double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector());
-            return MathUtils.log10ProbabilityToPhredScale(log10gq) >= minGenotypeQuality;
+            return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= minGenotypeQuality;
        }

        return minGenotypeQuality <= 0;
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -1251,6 +1251,16 @@ public class MathUtils {
        return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
    }

+    /**
+     * Checks that the result is a well-formed probability
+     *
+     * @param result a supposedly well-formed probability value
+     * @return true if result is really well formed
+     */
+    public static boolean goodProbability(final double result) {
+        return result >= 0.0 && result <= 1.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
+    }
+
    /**
     * A utility class that computes on the fly average and standard deviation for a stream of numbers.
     * The number of observations does not have to be known in advance, and can be also very big (so that
@ -1343,28 +1353,6 @@ public class MathUtils {
        return Math.max(a, x2);
    }

-    public static double phredScaleToProbability(byte q) {
-        return Math.pow(10, (-q) / 10.0);
-    }
-
-    public static double phredScaleToLog10Probability(byte q) {
-        return ((-q) / 10.0);
-    }
-
-    /**
-     * Returns the phred scaled value of probability p
-     *
-     * @param p probability (between 0 and 1).
-     * @return phred scaled probability of p
-     */
-    public static byte probabilityToPhredScale(double p) {
-        return (byte) ((-10) * Math.log10(p));
-    }
-
-    public static double log10ProbabilityToPhredScale(double log10p) {
-        return (-10) * log10p;
-    }
-
    /**
     * Converts LN to LOG10
     *
@ -1774,4 +1762,24 @@ public class MathUtils {

        return values;
    }
+
+    /**
+     * Compute in a numerical correct way the quanity log10(1-x)
+     *
+     * Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow
+     * in 1-x when x is very small
+     *
+     * @param x a positive double value between 0.0 and 1.0
+     * @return an estimate of log10(1-x)
+     */
+    @Requires("x >= 0.0 && x <= 1.0")
+    @Ensures("result <= 0.0")
+    public static double log10OneMinusX(final double x) {
+        if ( x == 1.0 )
+            return Double.NEGATIVE_INFINITY;
+        else if ( x == 0.0 )
+            return 0.0;
+        else
+            return Math.log10(1 / x - 1) + Math.log10(x);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@ -25,38 +25,50 @@

 package org.broadinstitute.sting.utils;

+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
 import net.sf.samtools.SAMUtils;

 /**
 * QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating
 * quality scores.
 *
- * @author Kiran Garimella
+ * @author Kiran Garimella, Mark DePristo
+ * @since Way back
 */
 public class QualityUtils {
-    public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
-    public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
-    public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
+    /**
+     * Maximum quality score that can be encoded in a SAM/BAM file
+     */
+    public final static byte MAX_SAM_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;

-    public final static double MIN_REASONABLE_ERROR = 0.0001;
-    public final static byte MAX_REASONABLE_Q_SCORE = 60;  // bams containing quals above this value are extremely suspicious and we should warn the user
-    public final static byte MAX_GATK_USABLE_Q_SCORE = 40; // quals above this value should be capped down to this value (because they are too high)
+
+    private final static double RAW_MIN_PHRED_SCALED_QUAL = Math.log10(Double.MIN_VALUE);
+    protected final static double MIN_PHRED_SCALED_QUAL = -10.0 * RAW_MIN_PHRED_SCALED_QUAL;
+
+    /**
+     * bams containing quals above this value are extremely suspicious and we should warn the user
+     */
+    public final static byte MAX_REASONABLE_Q_SCORE = 60;
+
+    /**
+     * The lowest quality score for a base that is considered reasonable for statistical analysis.  This is
+     * because Q 6 => you stand a 25% of being right, which means all bases are equally likely
+     */
    public final static byte MIN_USABLE_Q_SCORE = 6;
    public final static int MAPPING_QUALITY_UNAVAILABLE = 255;

+    /**
+     * Cached values for qual as byte calculations so they are very fast
+     */
    private static double qualToErrorProbCache[] = new double[256];
-    static {
-        for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
-    }
-
-    private static double qualToErrorProbLog10Cache[] = new double[256];
-    static {
-        for (int i = 0; i < 256; i++) qualToErrorProbLog10Cache[i] = qualToErrorProbLog10Raw(i);
-    }
-
    private static double qualToProbLog10Cache[] = new double[256];
+
    static {
-        for (int i = 0; i < 256; i++) qualToProbLog10Cache[i] = qualToProbLog10Raw(i);
+        for (int i = 0; i < 256; i++) {
+            qualToErrorProbCache[i] = qualToErrorProb((double) i);
+            qualToProbLog10Cache[i] = Math.log10(1.0 - qualToErrorProbCache[i]);
+        }
    }

    /**
@ -64,111 +76,301 @@ public class QualityUtils {
     */
    private QualityUtils() {}

+    // ----------------------------------------------------------------------
+    //
+    // These are all functions to convert a phred-scaled quality score to a probability
+    //
+    // ----------------------------------------------------------------------
+
    /**
-     * Convert a quality score to a probability.  This is the Phred-style
-     * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
+     * Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
+     *
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * Because the input is a discretized byte value, this function uses a cache so is very efficient
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
     *
     * @param qual a quality score (0-255)
     * @return a probability (0.0-1.0)
     */
-    static public double qualToProb(byte qual) {
+    @Ensures("result >= 0.0 && result <= 1.0")
+    public static double qualToProb(final byte qual) {
        return 1.0 - qualToErrorProb(qual);
    }

-    static public double qualToProb(double qual) {
-        return 1.0 - Math.pow(10.0, qual/(-10.0));
+    /**
+     * Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
+     *
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * Because the input is a double value, this function must call Math.pow so can be quite expensive
+     *
+     * @param qual a phred-scaled quality score encoded as a double.  Can be non-integer values (30.5)
+     * @return a probability (0.0-1.0)
+     */
+    @Requires("qual >= 0.0")
+    @Ensures("result >= 0.0 && result <= 1.0")
+    public static double qualToProb(final double qual) {
+        return 1.0 - qualToErrorProb(qual);
    }

-    static private double qualToProbLog10Raw(int qual) {
-        return Math.log10(1.0 - qualToErrorProbRaw(qual));
-    }
-
-    static public double qualToProbLog10(byte qual) {
+    /**
+     * Convert a phred-scaled quality score to its log10 probability of being true (Q30 => log10(0.999))
+     *
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * Because the input is a double value, this function must call Math.pow so can be quite expensive
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param qual a phred-scaled quality score encoded as a double.  Can be non-integer values (30.5)
+     * @return a probability (0.0-1.0)
+     */
+    @Ensures("result <= 0.0")
+    public static double qualToProbLog10(final byte qual) {
        return qualToProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
    }

    /**
-     * Convert a quality score to a probability of error.  This is the Phred-style
-     * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
+     * Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
     *
-     * @param qual a quality score (0 - 255)
-     * @return a probability (0.0 - 1.0)
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * Because the input is a double value, this function must call Math.pow so can be quite expensive
+     *
+     * @param qual a phred-scaled quality score encoded as a double.  Can be non-integer values (30.5)
+     * @return a probability (0.0-1.0)
     */
-    static private double qualToErrorProbRaw(int qual) {
-        return qualToErrorProb((double) qual);
-    }
-
+    @Requires("qual >= 0.0")
+    @Ensures("result >= 0.0 && result <= 1.0")
    public static double qualToErrorProb(final double qual) {
-        return Math.pow(10.0, qual/-10.0);
+        return Math.pow(10.0, qual / -10.0);
    }

-
-    static public double qualToErrorProb(byte qual) {
+    /**
+     * Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
+     *
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * Because the input is a byte value, this function uses a cache so is very efficient
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param qual a phred-scaled quality score encoded as a byte
+     * @return a probability (0.0-1.0)
+     */
+    @Ensures("result >= 0.0 && result <= 1.0")
+    public static double qualToErrorProb(final byte qual) {
        return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
    }

-    static private double qualToErrorProbLog10Raw(int qual) {
-        return ((double) qual)/-10.0;
-    }

-    static public double qualToErrorProbLog10(byte qual) {
-        return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
-    }
-
-    static public double qualToErrorProbLog10(final double qual) {
-        return qual/-10.0;
+    /**
+     * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
+     *
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * The calculation is extremely efficient
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param qual a phred-scaled quality score encoded as a byte
+     * @return a probability (0.0-1.0)
+     */
+    @Ensures("result <= 0.0")
+    public static double qualToErrorProbLog10(final byte qual) {
+        return qualToErrorProbLog10((double)(qual & 0xFF));
    }

    /**
-     * Convert a probability to a quality score.  Note, this is capped at Q40.
+     * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
     *
-     * @param prob a probability (0.0-1.0)
-     * @return a quality score (0-40)
+     * This is the Phred-style conversion, *not* the Illumina-style conversion.
+     *
+     * The calculation is extremely efficient
+     *
+     * @param qual a phred-scaled quality score encoded as a double
+     * @return a probability (0.0-1.0)
     */
-    static public byte probToQual(double prob) {
-        return probToQual(prob, MIN_REASONABLE_ERROR);
-        //return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001));
+    @Ensures("result <= 0.0")
+    public static double qualToErrorProbLog10(final double qual) {
+        return qual / -10.0;
+    }
+
+    // ----------------------------------------------------------------------
+    //
+    // Functions to convert a probability to a phred-scaled quality score
+    //
+    // ----------------------------------------------------------------------
+
+    /**
+     * Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
+     *
+     * Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
+     * and by 1 at the low-end.
+     *
+     * @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
+     * @return a quality score (0-MAX_SAM_QUAL_SCORE)
+     */
+    @Requires("errorRate >= 0.0 && errorRate <= 1.0")
+    public static byte errorProbToQual(final double errorRate) {
+        return errorProbToQual(errorRate, MAX_SAM_QUAL_SCORE);
    }

    /**
-     * Convert a probability to a quality score.  Note, this is capped at a quality score which is determined by _eps_.
+     * Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
     *
-     * @param prob a probability (0.0-1.0)
-     * @param eps min probabilty allowed (0.0-1.0)
-     * @return a quality score (0-255)
+     * Note, this function caps the resulting quality score by the public static value MIN_REASONABLE_ERROR
+     * and by 1 at the low-end.
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
+     * @return a quality score (0-maxQual)
     */
-    static public byte probToQual(double prob, double eps) {
-        double lp = Math.round(-10.0*Math.log10(1.0 - prob + eps));
-        //System.out.printf("LP is %f, byte is %d%n", lp, b);
-        return boundQual((int)lp);
-    }
-
-    static public double phredScaleCorrectRate(double trueRate) {
-        return phredScaleErrorRate(1-trueRate);
-    }
-
-    static public double phredScaleErrorRate(double errorRate) {
-        return Math.abs(-10.0*Math.log10(errorRate));
+    @Requires("errorRate >= 0.0 && errorRate <= 1.0")
+    public static byte errorProbToQual(final double errorRate, final byte maxQual) {
+        final double d = Math.round(-10.0*Math.log10(errorRate));
+        return boundQual((int)d, maxQual);
    }

    /**
-     * Return a quality score, capped at max qual.
-     *
-     * @param qual  the uncapped quality score
-     * @return the capped quality score
+     * @see #errorProbToQual(double, byte) with proper conversion of maxQual integer to a byte
     */
-    static public byte boundQual(int qual) {
-        return boundQual(qual, MAX_QUAL_SCORE);
+    @Requires("maxQual >= 0 && maxQual < 255")
+    public static byte errorProbToQual(final double prob, final int maxQual) {
+        return errorProbToQual(prob, (byte)(maxQual & 0xFF));
    }

    /**
-     * Returns an integer quality score bounded by 1 - maxQual.
+     * Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
     *
-     * @param qual    the quality score
-     * @param maxQual the maximum quality
-     * @return the integer betwen 1 and maxqual.
+     * Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
+     * and by 1 at the low-end.
+     *
+     * @param prob a probability (0.0-1.0) of being right
+     * @return a quality score (0-MAX_SAM_QUAL_SCORE)
     */
-    static public byte boundQual(int qual, byte maxQual) {
-        return (byte) Math.max(Math.min(qual, maxQual), 1);
+    @Requires("prob >= 0.0 && prob <= 1.0")
+    public static byte trueProbToQual(final double prob) {
+        return trueProbToQual(prob, MAX_SAM_QUAL_SCORE);
+    }
+
+    /**
+     * Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
+     *
+     * Note, this function caps the resulting quality score by the min probability allowed (EPS).
+     * So for example, if prob is 1e-6, which would imply a Q-score of 60, and EPS is 1e-4,
+     * the result of this function is actually Q40.
+     *
+     * Note that the resulting quality score, regardless of EPS, is capped by MAX_SAM_QUAL_SCORE and
+     * bounded on the low-side by 1.
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param prob a probability (0.0-1.0) of being right
+     * @param maxQual the maximum quality score we are allowed to emit here, regardless of the error rate
+     * @return a phred-scaled quality score (0-maxQualScore) as a byte
+     */
+    @Requires({
+            "prob >= 0.0 && prob <= 1.0"
+    })
+    @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
+    public static byte trueProbToQual(final double prob, final byte maxQual) {
+        final double lp = Math.round(-10.0*MathUtils.log10OneMinusX(prob));
+        return boundQual((int)lp, maxQual);
+    }
+
+    /**
+     * @see #trueProbToQual(double, byte) with proper conversion of maxQual to a byte
+     */
+    @Requires("maxQual >= 0 && maxQual < 255")
+    public static byte trueProbToQual(final double prob, final int maxQual) {
+        return trueProbToQual(prob, (byte)(maxQual & 0xFF));
+    }
+
+    /**
+     * Convert a probability of being right to a phred-scaled quality score of being wrong as a double
+     *
+     * This is a very generic method, that simply computes a phred-scaled double quality
+     * score given an error rate.  It has the same precision as a normal double operation
+     *
+     * @param trueRate the probability of being right (0.0-1.0)
+     * @return a phred-scaled version of the error rate implied by trueRate
+     */
+    @Requires("MathUtils.goodProbability(trueRate)")
+    @Ensures("result >= 0.0")
+    public static double phredScaleCorrectRate(final double trueRate) {
+        return phredScaleLog10ErrorRate(MathUtils.log10OneMinusX(trueRate));
+    }
+
+    /**
+     * Convert a probability of being wrong to a phred-scaled quality score of being wrong as a double
+     *
+     * This is a very generic method, that simply computes a phred-scaled double quality
+     * score given an error rate.  It has the same precision as a normal double operation
+     *
+     * @param errorRate the probability of being wrong (0.0-1.0)
+     * @return a phred-scaled version of the error rate
+     */
+    @Requires("MathUtils.goodProbability(errorRate)")
+    @Ensures("result >= 0.0")
+    public static double phredScaleErrorRate(final double errorRate) {
+        return phredScaleLog10ErrorRate(Math.log10(errorRate));
+    }
+
+    /**
+     * Convert a log10 probability of being wrong to a phred-scaled quality score of being wrong as a double
+     *
+     * This is a very generic method, that simply computes a phred-scaled double quality
+     * score given an error rate.  It has the same precision as a normal double operation
+     *
+     * @param errorRateLog10 the log10 probability of being wrong (0.0-1.0)
+     * @return a phred-scaled version of the error rate
+     */
+    @Ensures("result >= 0.0")
+    public static double phredScaleLog10ErrorRate(final double errorRateLog10) {
+        return -10.0 * Math.max(errorRateLog10, RAW_MIN_PHRED_SCALED_QUAL);
+    }
+
+    // ----------------------------------------------------------------------
+    //
+    // Routines to bound a quality score to a reasonable range
+    //
+    // ----------------------------------------------------------------------
+
+    /**
+     * Return a quality score that bounds qual by MAX_SAM_QUAL_SCORE and 1
+     *
+     * @param qual the uncapped quality score as an integer
+     * @return the bounded quality score
+     */
+    @Requires("qual >= 0")
+    @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (MAX_SAM_QUAL_SCORE & 0xFF)")
+    public static byte boundQual(int qual) {
+        return boundQual(qual, MAX_SAM_QUAL_SCORE);
+    }
+
+    /**
+     * Return a quality score that bounds qual by maxQual and 1
+     *
+     * WARNING -- because this function takes a byte for maxQual, you must be careful in converting
+     * integers to byte.  The appropriate way to do this is ((byte)(myInt & 0xFF))
+     *
+     * @param qual the uncapped quality score as an integer
+     * @param maxQual the maximum quality score, must be less < 255
+     * @return the bounded quality score
+     */
+    @Requires({"qual >= 0"})
+    @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
+    public static byte boundQual(final int qual, final byte maxQual) {
+        return (byte) (Math.max(Math.min(qual, maxQual & 0xFF), 1) & 0xFF);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java
@ -94,8 +94,7 @@ public class DupUtils {

        Arrays.sort(probs);
        double normalizedP = Math.pow(10, bestProb) / sumProbs;
-        double eps = Math.pow(10, -maxQScore/10.0);
-        byte qual = QualityUtils.probToQual(normalizedP, eps);
+        byte qual = QualityUtils.trueProbToQual(normalizedP, maxQScore);
 //        if ( false ) {
 //            System.out.printf("Best base is %s %.8f%n", bestBase, bestProb);
 //            System.out.printf("2nd  base is %.8f%n", probs[1]);
--- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java
+++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java
@ -129,7 +129,7 @@ public class LIBSPerformance extends CommandLineProgram {
 //            read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
 //            final byte[] quals = new byte[readLength];
 //            for ( int i = 0; i < readLength; i++ )
-//                quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
+//                quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
 //            read.setBaseQualities(quals);
 //            read.setCigarString(cigar);
 //
--- a/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java
@ -34,6 +34,7 @@ package org.broadinstitute.sting.utils;
 import org.broadinstitute.sting.BaseTest;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

 import java.util.*;
@ -42,10 +43,64 @@ import java.util.*;
 * Basic unit test for QualityUtils class
 */
 public class QualityUtilsUnitTest extends BaseTest {
+    final private static double TOLERANCE = 1e-9;
+
    @BeforeClass
    public void init() {
    }

+    @DataProvider(name = "QualTest")
+    public Object[][] makeMyDataProvider() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        for ( int qual = 0; qual < 255; qual++ ) {
+            tests.add(new Object[]{(byte)(qual & 0xFF), Math.pow(10.0, ((double)qual)/-10.0)});
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    /**
+     * Example testng test using MyDataProvider
+     */
+    @Test(dataProvider = "QualTest")
+    public void testMyData(final byte qual, final double errorRate) {
+        final double trueRate = 1 - errorRate;
+
+        final double actualErrorRate = QualityUtils.qualToErrorProb(qual);
+        Assert.assertEquals(actualErrorRate, errorRate, TOLERANCE);
+        final double actualTrueRate = QualityUtils.qualToProb(qual);
+        Assert.assertEquals(actualTrueRate, trueRate, TOLERANCE);
+
+        // log10 tests
+        final double actualLog10ErrorRate = QualityUtils.qualToErrorProbLog10(qual);
+        Assert.assertEquals(actualLog10ErrorRate, Math.log10(errorRate), TOLERANCE);
+        final double actualLog10TrueRate = QualityUtils.qualToProbLog10(qual);
+        Assert.assertEquals(actualLog10TrueRate, Math.log10(trueRate), TOLERANCE);
+
+        // test that we can convert our error rates to quals, accounting for boundaries
+        final int expectedQual = Math.max(Math.min(qual & 0xFF, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
+        final byte actualQual = QualityUtils.trueProbToQual(trueRate);
+        Assert.assertEquals(actualQual, expectedQual & 0xFF);
+        final byte actualQualFromErrorRate = QualityUtils.errorProbToQual(errorRate);
+        Assert.assertEquals(actualQualFromErrorRate, expectedQual & 0xFF);
+
+        for ( int maxQual = 10; maxQual < QualityUtils.MAX_SAM_QUAL_SCORE; maxQual++ ) {
+            final byte maxAsByte = (byte)(maxQual & 0xFF);
+            final byte expectedQual2 = (byte)(Math.max(Math.min(qual & 0xFF, maxQual), 1) & 0xFF);
+            final byte actualQual2 = QualityUtils.trueProbToQual(trueRate, maxAsByte);
+            Assert.assertEquals(actualQual2, expectedQual2, "Failed with max " + maxQual);
+            final byte actualQualFromErrorRate2 = QualityUtils.errorProbToQual(errorRate, maxAsByte);
+            Assert.assertEquals(actualQualFromErrorRate2, expectedQual2, "Failed with max " + maxQual);
+
+            // test the integer routines
+            final byte actualQualInt2 = QualityUtils.trueProbToQual(trueRate, maxQual);
+            Assert.assertEquals(actualQualInt2, expectedQual2, "Failed with max " + maxQual);
+            final byte actualQualFromErrorRateInt2 = QualityUtils.errorProbToQual(errorRate, maxQual);
+            Assert.assertEquals(actualQualFromErrorRateInt2, expectedQual2, "Failed with max " + maxQual);
+        }
+    }
+
    @Test
    public void testQualCaches() {
        Assert.assertEquals(QualityUtils.qualToErrorProb((byte) 20), 0.01, 1e-6);
@ -63,4 +118,59 @@ public class QualityUtilsUnitTest extends BaseTest {
        Assert.assertEquals(QualityUtils.qualToProb((byte) 40), 0.9999, 1e-6);
        Assert.assertEquals(QualityUtils.qualToProbLog10((byte) 40), -4.34316198e-5, 1e-6);
    }
+
+    @Test()
+    public void testBoundingDefault() {
+        for ( int qual = 0; qual < 1000; qual++ ) {
+            final byte expected = (byte)Math.max(Math.min(qual, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
+            Assert.assertEquals(QualityUtils.boundQual(qual), expected);
+        }
+    }
+
+    @Test()
+    public void testBoundingWithMax() {
+        for ( int max = 10; max < 255; max += 50 ) {
+            for ( int qual = 0; qual < 1000; qual++ ) {
+                final int expected = Math.max(Math.min(qual, max), 1);
+                Assert.assertEquals(QualityUtils.boundQual(qual, (byte)(max & 0xFF)) & 0xFF, expected & 0xFF, "qual " + qual + " max " + max);
+            }
+        }
+    }
+
+    @DataProvider(name = "PhredScaleDoubleOps")
+    public Object[][] makePhredDoubleTest() {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        tests.add(new Object[]{0.0, -10 * Math.log10(Double.MIN_VALUE)});
+        tests.add(new Object[]{1.0, 0.0});
+        for ( int pow = 1; pow < 20; pow++ ) {
+            tests.add(new Object[]{Math.pow(10.0, -1.0 * pow), pow * 10});
+            tests.add(new Object[]{Math.pow(10.0, -1.5 * pow), pow * 15});
+        }
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test()
+    public void testQualToErrorProbDouble() {
+        for ( double qual = 3.0; qual < 255.0; qual += 0.1 ) {
+            final double expected = Math.pow(10.0, qual / -10.0);
+            Assert.assertEquals(QualityUtils.qualToErrorProb(qual), expected, TOLERANCE, "failed qual->error prob for double qual " + qual);
+        }
+    }
+
+
+    @Test(dataProvider = "PhredScaleDoubleOps")
+    public void testPhredScaleDoubleOps(final double errorRate, final double expectedPhredScaled) {
+        final double actualError = QualityUtils.phredScaleErrorRate(errorRate);
+        Assert.assertEquals(actualError, expectedPhredScaled, TOLERANCE);
+        final double trueRate = 1 - errorRate;
+        final double actualTrue = QualityUtils.phredScaleCorrectRate(trueRate);
+        if ( trueRate == 1.0 ) {
+            Assert.assertEquals(actualTrue, QualityUtils.MIN_PHRED_SCALED_QUAL);
+        } else {
+            final double tol = errorRate < 1e-10 ? 10.0 : 1e-3;
+            Assert.assertEquals(actualTrue, expectedPhredScaled, tol);
+        }
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java
@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator;
 import com.google.caliper.Param;
 import com.google.caliper.SimpleBenchmark;
 import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.QualityUtils;
@ -63,7 +62,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
            read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
            final byte[] quals = new byte[readLength];
            for ( int i = 0; i < readLength; i++ )
-                quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
+                quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
            read.setBaseQualities(quals);
            read.setCigarString(cigar);
            reads.add(read);
--- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java
@ -141,7 +141,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
            read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
            final byte[] quals = new byte[readLength];
            for ( int i = 0; i < readLength; i++ )
-                quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
+                quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
            read.setBaseQualities(quals);
            read.setCigarString(cigarString);
            return read;
--- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java
@ -315,7 +315,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
                read.setReadBases(("TT" + eventBases + "A").getBytes());
                final byte[] quals = new byte[readLength];
                for ( int i = 0; i < readLength; i++ )
-                    quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
+                    quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
                read.setBaseQualities(quals);
                read.setCigarString(cigar);