diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java index 83d5bb29b..94d1c5501 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java @@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo { } private boolean validQual(final byte result) { - return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE; + return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 2baa89999..941b11b36 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -267,7 +267,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { // // ------------------------------------------------------------------------------------- - static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY]; + static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_SAM_QUAL_SCORE +1][BaseUtils.BASES.length+1][QualityUtils.MAX_SAM_QUAL_SCORE +1][MAX_PLOIDY]; protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) { return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index 1b004d889..49494ebb0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -123,7 +124,7 @@ public class ErrorModel { } } - double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); + double p = QualityUtils.qualToErrorProbLog10((byte)(maxQualityScore-minQualityScore)); if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) { for (byte q=minQualityScore; q<=maxQualityScore; q++) { // maximum uncertainty if there's no ref data at site @@ -270,7 +271,7 @@ public class ErrorModel { }) private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) { // same as log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows - double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage; + double lambda = QualityUtils.qualToErrorProb(q) * (double )coverage; // log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k) return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 19d218023..4cfd8c7bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -613,7 +613,7 @@ public class UnifiedGenotyperEngine { P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth); } - return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false); + return new VariantCallContext(vc, QualityUtils.phredScaleCorrectRate(P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false); } protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 21f2bd8db..54a324411 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; @@ -395,7 +396,7 @@ public class PhaseByTransmission extends RodWalker, HashMa int phredScoreTransmission = -1; if(transmissionProb != NO_TRANSMISSION_PROB){ - double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb))); + double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb))); phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE; } //Handle null, missing and unavailable genotypes diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index e8388a3d7..7f2cdd3d0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -1017,7 +1017,7 @@ public class ReadBackedPhasing extends RodWalker= 0", "nErrors >= 0", "nErrors <= nObservations", - "fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE", + "fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE", "mergeOrder >= 0"}) protected final class QualInterval implements Comparable { final int qStart, qEnd, fixedQual, level; @@ -224,10 +224,10 @@ public class QualQuantizer { /** * @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual. */ - @Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE") + @Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE") public byte getQual() { if ( ! hasFixedQual() ) - return QualityUtils.probToQual(1-getErrorRate(), 0); + return QualityUtils.errorProbToQual(getErrorRate()); else return (byte)fixedQual; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index eb4f61266..464390b99 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -76,7 +76,7 @@ public class QuantizationInfo { } public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { - final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution + final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution for (int i = 0; i < qualHistogram.length; i++) qualHistogram[i] = 0L; @@ -100,7 +100,7 @@ public class QuantizationInfo { } public void noQuantization() { - this.quantizationLevels = QualityUtils.MAX_QUAL_SCORE; + this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE; for (int i = 0; i < this.quantizationLevels; i++) quantizedQuals.set(i, (byte) i); } @@ -124,7 +124,7 @@ public class QuantizationInfo { quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); - for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) { + for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) { quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index be537f294..ea3781204 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -74,10 +74,10 @@ package org.broadinstitute.sting.utils.recalibration; import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; +import net.sf.samtools.SAMUtils; import org.apache.commons.math.optimization.fitting.GaussianFunction; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; /** @@ -100,6 +100,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; "numMismatches <= numObservations" }) public class RecalDatum { + public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE; private static final double UNINITIALIZED = -1.0; /** @@ -337,7 +338,7 @@ public class RecalDatum { // This is the old and busted point estimate approach: //final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate()); - empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); + empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE); } //static final boolean DEBUG = false; @@ -369,7 +370,12 @@ public class RecalDatum { return Qemp; } - static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1]; + /** + * Quals above this value should be capped down to this value (because they are too high) + * in the base quality score recalibrator + */ + public final static byte MAX_GATK_USABLE_Q_SCORE = 40; + static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1]; static { // f(x) = a + b*exp(-((x - c)^2 / (2*d^2))) // Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell". @@ -379,7 +385,7 @@ public class RecalDatum { final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d); - for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) { + for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) { double log10Prior = Math.log10(gaussian.value((double) i)); if ( Double.isInfinite(log10Prior) ) log10Prior = -Double.MAX_VALUE; @@ -388,7 +394,7 @@ public class RecalDatum { } static protected double log10QempPrior(final double Qempirical, final double Qreported) { - final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE); + final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE); //if ( DEBUG ) // System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference])); return log10QempPriorCache[difference]; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e5860b4ad..a3fec6a22 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -263,11 +263,11 @@ public class RecalibrationReport { * Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores * * @param table the GATKReportTable containing the quantization mappings - * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE + * @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE */ private QuantizationInfo initializeQuantizationTable(GATKReportTable table) { - final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; - final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; + final Byte[] quals = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1]; + final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1]; for ( int i = 0; i < table.getNumRows(); i++ ) { final byte originalQual = (byte)i; final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java index 4d5af87a8..46284b27e 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java @@ -119,6 +119,6 @@ public class QualityScoreCovariate implements RequiredCovariate { @Override public int maximumKeyValue() { - return QualityUtils.MAX_QUAL_SCORE; + return QualityUtils.MAX_SAM_QUAL_SCORE; } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java index 8f228c154..696cf846f 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java @@ -90,7 +90,7 @@ public class QualQuantizerUnitTest extends BaseTest { this.exError = exError; this.exTotal = exTotal; this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1)); - this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0); + this.exQual = QualityUtils.errorProbToQual(this.exErrorRate); } } diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java index da78932d1..5b7b95be9 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -50,7 +50,6 @@ package org.broadinstitute.sting.utils.recalibration; // the imports for unit testing. -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -58,7 +57,6 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -207,8 +205,8 @@ public class RecalDatumUnitTest extends BaseTest { @Test public void testlog10QempPrior() { - for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) { - for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) { + for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) { + for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) { final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep); Assert.assertTrue(log10prior < 0.0); Assert.assertFalse(Double.isInfinite(log10prior)); @@ -219,7 +217,7 @@ public class RecalDatumUnitTest extends BaseTest { final int Qrep = 20; int maxQemp = -1; double maxQempValue = -Double.MAX_VALUE; - for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) { + for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) { final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep); if ( log10prior > maxQempValue ) { maxQemp = Qemp; diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index e82f1338a..7d1e51385 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -67,7 +67,7 @@ public class RecalibrationReportUnitTest { final Random random = new Random(); final int nObservations = random.nextInt(maxObservations); final int nErrors = Math.min(random.nextInt(maxErrors), nObservations); - final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); + final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE); return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual); } @@ -75,10 +75,10 @@ public class RecalibrationReportUnitTest { public void testOutput() { final int length = 100; - List quals = new ArrayList(QualityUtils.MAX_QUAL_SCORE + 1); - List counts = new ArrayList(QualityUtils.MAX_QUAL_SCORE + 1); + List quals = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); + List counts = new ArrayList(QualityUtils.MAX_SAM_QUAL_SCORE + 1); - for (int i = 0; i<= QualityUtils.MAX_QUAL_SCORE; i++) { + for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) { quals.add((byte) i); counts.add(1L); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index f361d5e2b..76f5478a4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -200,7 +200,7 @@ public class ErrorRatePerCycle extends LocusWalker { final int mismatches = (Integer)table.get(key, "mismatches"); final int count = (Integer)table.get(key, "counts"); final double errorRate = (mismatches + 1) / (1.0*(count + 1)); - final int qual = QualityUtils.probToQual(1-errorRate, 0.0); + final int qual = QualityUtils.errorProbToQual(errorRate); table.set(key, "qual", qual); table.set(key, "errorrate", errorRate); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index ce9e28c4b..8d16e6ca2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.help.HelpConstants; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -408,7 +409,7 @@ public class VariantsToBinaryPed extends RodWalker { return genotype.getGQ() >= minGenotypeQuality; } else if ( genotype.hasLikelihoods() ) { double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()); - return MathUtils.log10ProbabilityToPhredScale(log10gq) >= minGenotypeQuality; + return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= minGenotypeQuality; } return minGenotypeQuality <= 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 0c3ed87c0..4db55b275 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -1251,6 +1251,16 @@ public class MathUtils { return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result); } + /** + * Checks that the result is a well-formed probability + * + * @param result a supposedly well-formed probability value + * @return true if result is really well formed + */ + public static boolean goodProbability(final double result) { + return result >= 0.0 && result <= 1.0 && ! Double.isInfinite(result) && ! Double.isNaN(result); + } + /** * A utility class that computes on the fly average and standard deviation for a stream of numbers. * The number of observations does not have to be known in advance, and can be also very big (so that @@ -1343,28 +1353,6 @@ public class MathUtils { return Math.max(a, x2); } - public static double phredScaleToProbability(byte q) { - return Math.pow(10, (-q) / 10.0); - } - - public static double phredScaleToLog10Probability(byte q) { - return ((-q) / 10.0); - } - - /** - * Returns the phred scaled value of probability p - * - * @param p probability (between 0 and 1). - * @return phred scaled probability of p - */ - public static byte probabilityToPhredScale(double p) { - return (byte) ((-10) * Math.log10(p)); - } - - public static double log10ProbabilityToPhredScale(double log10p) { - return (-10) * log10p; - } - /** * Converts LN to LOG10 * @@ -1774,4 +1762,24 @@ public class MathUtils { return values; } + + /** + * Compute in a numerical correct way the quanity log10(1-x) + * + * Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow + * in 1-x when x is very small + * + * @param x a positive double value between 0.0 and 1.0 + * @return an estimate of log10(1-x) + */ + @Requires("x >= 0.0 && x <= 1.0") + @Ensures("result <= 0.0") + public static double log10OneMinusX(final double x) { + if ( x == 1.0 ) + return Double.NEGATIVE_INFINITY; + else if ( x == 0.0 ) + return 0.0; + else + return Math.log10(1 / x - 1) + Math.log10(x); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 4519a656b..9dd9b735d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -25,38 +25,50 @@ package org.broadinstitute.sting.utils; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import net.sf.samtools.SAMUtils; /** * QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating * quality scores. * - * @author Kiran Garimella + * @author Kiran Garimella, Mark DePristo + * @since Way back */ public class QualityUtils { - public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE; - public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; - public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE); + /** + * Maximum quality score that can be encoded in a SAM/BAM file + */ + public final static byte MAX_SAM_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; - public final static double MIN_REASONABLE_ERROR = 0.0001; - public final static byte MAX_REASONABLE_Q_SCORE = 60; // bams containing quals above this value are extremely suspicious and we should warn the user - public final static byte MAX_GATK_USABLE_Q_SCORE = 40; // quals above this value should be capped down to this value (because they are too high) + + private final static double RAW_MIN_PHRED_SCALED_QUAL = Math.log10(Double.MIN_VALUE); + protected final static double MIN_PHRED_SCALED_QUAL = -10.0 * RAW_MIN_PHRED_SCALED_QUAL; + + /** + * bams containing quals above this value are extremely suspicious and we should warn the user + */ + public final static byte MAX_REASONABLE_Q_SCORE = 60; + + /** + * The lowest quality score for a base that is considered reasonable for statistical analysis. This is + * because Q 6 => you stand a 25% of being right, which means all bases are equally likely + */ public final static byte MIN_USABLE_Q_SCORE = 6; public final static int MAPPING_QUALITY_UNAVAILABLE = 255; + /** + * Cached values for qual as byte calculations so they are very fast + */ private static double qualToErrorProbCache[] = new double[256]; - static { - for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i); - } - - private static double qualToErrorProbLog10Cache[] = new double[256]; - static { - for (int i = 0; i < 256; i++) qualToErrorProbLog10Cache[i] = qualToErrorProbLog10Raw(i); - } - private static double qualToProbLog10Cache[] = new double[256]; + static { - for (int i = 0; i < 256; i++) qualToProbLog10Cache[i] = qualToProbLog10Raw(i); + for (int i = 0; i < 256; i++) { + qualToErrorProbCache[i] = qualToErrorProb((double) i); + qualToProbLog10Cache[i] = Math.log10(1.0 - qualToErrorProbCache[i]); + } } /** @@ -64,111 +76,301 @@ public class QualityUtils { */ private QualityUtils() {} + // ---------------------------------------------------------------------- + // + // These are all functions to convert a phred-scaled quality score to a probability + // + // ---------------------------------------------------------------------- + /** - * Convert a quality score to a probability. This is the Phred-style - * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same). + * Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999) + * + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * Because the input is a discretized byte value, this function uses a cache so is very efficient + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) * * @param qual a quality score (0-255) * @return a probability (0.0-1.0) */ - static public double qualToProb(byte qual) { + @Ensures("result >= 0.0 && result <= 1.0") + public static double qualToProb(final byte qual) { return 1.0 - qualToErrorProb(qual); } - static public double qualToProb(double qual) { - return 1.0 - Math.pow(10.0, qual/(-10.0)); + /** + * Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999) + * + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * Because the input is a double value, this function must call Math.pow so can be quite expensive + * + * @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5) + * @return a probability (0.0-1.0) + */ + @Requires("qual >= 0.0") + @Ensures("result >= 0.0 && result <= 1.0") + public static double qualToProb(final double qual) { + return 1.0 - qualToErrorProb(qual); } - static private double qualToProbLog10Raw(int qual) { - return Math.log10(1.0 - qualToErrorProbRaw(qual)); - } - - static public double qualToProbLog10(byte qual) { + /** + * Convert a phred-scaled quality score to its log10 probability of being true (Q30 => log10(0.999)) + * + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * Because the input is a double value, this function must call Math.pow so can be quite expensive + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5) + * @return a probability (0.0-1.0) + */ + @Ensures("result <= 0.0") + public static double qualToProbLog10(final byte qual) { return qualToProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } /** - * Convert a quality score to a probability of error. This is the Phred-style - * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same). + * Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001) * - * @param qual a quality score (0 - 255) - * @return a probability (0.0 - 1.0) + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * Because the input is a double value, this function must call Math.pow so can be quite expensive + * + * @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5) + * @return a probability (0.0-1.0) */ - static private double qualToErrorProbRaw(int qual) { - return qualToErrorProb((double) qual); - } - + @Requires("qual >= 0.0") + @Ensures("result >= 0.0 && result <= 1.0") public static double qualToErrorProb(final double qual) { - return Math.pow(10.0, qual/-10.0); + return Math.pow(10.0, qual / -10.0); } - - static public double qualToErrorProb(byte qual) { + /** + * Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001) + * + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * Because the input is a byte value, this function uses a cache so is very efficient + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param qual a phred-scaled quality score encoded as a byte + * @return a probability (0.0-1.0) + */ + @Ensures("result >= 0.0 && result <= 1.0") + public static double qualToErrorProb(final byte qual) { return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } - static private double qualToErrorProbLog10Raw(int qual) { - return ((double) qual)/-10.0; - } - static public double qualToErrorProbLog10(byte qual) { - return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. - } - - static public double qualToErrorProbLog10(final double qual) { - return qual/-10.0; + /** + * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001)) + * + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * The calculation is extremely efficient + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param qual a phred-scaled quality score encoded as a byte + * @return a probability (0.0-1.0) + */ + @Ensures("result <= 0.0") + public static double qualToErrorProbLog10(final byte qual) { + return qualToErrorProbLog10((double)(qual & 0xFF)); } /** - * Convert a probability to a quality score. Note, this is capped at Q40. + * Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001)) * - * @param prob a probability (0.0-1.0) - * @return a quality score (0-40) + * This is the Phred-style conversion, *not* the Illumina-style conversion. + * + * The calculation is extremely efficient + * + * @param qual a phred-scaled quality score encoded as a double + * @return a probability (0.0-1.0) */ - static public byte probToQual(double prob) { - return probToQual(prob, MIN_REASONABLE_ERROR); - //return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001)); + @Ensures("result <= 0.0") + public static double qualToErrorProbLog10(final double qual) { + return qual / -10.0; + } + + // ---------------------------------------------------------------------- + // + // Functions to convert a probability to a phred-scaled quality score + // + // ---------------------------------------------------------------------- + + /** + * Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20). + * + * Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE + * and by 1 at the low-end. + * + * @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong) + * @return a quality score (0-MAX_SAM_QUAL_SCORE) + */ + @Requires("errorRate >= 0.0 && errorRate <= 1.0") + public static byte errorProbToQual(final double errorRate) { + return errorProbToQual(errorRate, MAX_SAM_QUAL_SCORE); } /** - * Convert a probability to a quality score. Note, this is capped at a quality score which is determined by _eps_. + * Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20). * - * @param prob a probability (0.0-1.0) - * @param eps min probabilty allowed (0.0-1.0) - * @return a quality score (0-255) + * Note, this function caps the resulting quality score by the public static value MIN_REASONABLE_ERROR + * and by 1 at the low-end. + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong) + * @return a quality score (0-maxQual) */ - static public byte probToQual(double prob, double eps) { - double lp = Math.round(-10.0*Math.log10(1.0 - prob + eps)); - //System.out.printf("LP is %f, byte is %d%n", lp, b); - return boundQual((int)lp); - } - - static public double phredScaleCorrectRate(double trueRate) { - return phredScaleErrorRate(1-trueRate); - } - - static public double phredScaleErrorRate(double errorRate) { - return Math.abs(-10.0*Math.log10(errorRate)); + @Requires("errorRate >= 0.0 && errorRate <= 1.0") + public static byte errorProbToQual(final double errorRate, final byte maxQual) { + final double d = Math.round(-10.0*Math.log10(errorRate)); + return boundQual((int)d, maxQual); } /** - * Return a quality score, capped at max qual. - * - * @param qual the uncapped quality score - * @return the capped quality score + * @see #errorProbToQual(double, byte) with proper conversion of maxQual integer to a byte */ - static public byte boundQual(int qual) { - return boundQual(qual, MAX_QUAL_SCORE); + @Requires("maxQual >= 0 && maxQual < 255") + public static byte errorProbToQual(final double prob, final int maxQual) { + return errorProbToQual(prob, (byte)(maxQual & 0xFF)); } /** - * Returns an integer quality score bounded by 1 - maxQual. + * Convert a probability of being right to a phred-scaled quality score (0.99 => 20). * - * @param qual the quality score - * @param maxQual the maximum quality - * @return the integer betwen 1 and maxqual. + * Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE + * and by 1 at the low-end. + * + * @param prob a probability (0.0-1.0) of being right + * @return a quality score (0-MAX_SAM_QUAL_SCORE) */ - static public byte boundQual(int qual, byte maxQual) { - return (byte) Math.max(Math.min(qual, maxQual), 1); + @Requires("prob >= 0.0 && prob <= 1.0") + public static byte trueProbToQual(final double prob) { + return trueProbToQual(prob, MAX_SAM_QUAL_SCORE); + } + + /** + * Convert a probability of being right to a phred-scaled quality score (0.99 => 20). + * + * Note, this function caps the resulting quality score by the min probability allowed (EPS). + * So for example, if prob is 1e-6, which would imply a Q-score of 60, and EPS is 1e-4, + * the result of this function is actually Q40. + * + * Note that the resulting quality score, regardless of EPS, is capped by MAX_SAM_QUAL_SCORE and + * bounded on the low-side by 1. + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param prob a probability (0.0-1.0) of being right + * @param maxQual the maximum quality score we are allowed to emit here, regardless of the error rate + * @return a phred-scaled quality score (0-maxQualScore) as a byte + */ + @Requires({ + "prob >= 0.0 && prob <= 1.0" + }) + @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)") + public static byte trueProbToQual(final double prob, final byte maxQual) { + final double lp = Math.round(-10.0*MathUtils.log10OneMinusX(prob)); + return boundQual((int)lp, maxQual); + } + + /** + * @see #trueProbToQual(double, byte) with proper conversion of maxQual to a byte + */ + @Requires("maxQual >= 0 && maxQual < 255") + public static byte trueProbToQual(final double prob, final int maxQual) { + return trueProbToQual(prob, (byte)(maxQual & 0xFF)); + } + + /** + * Convert a probability of being right to a phred-scaled quality score of being wrong as a double + * + * This is a very generic method, that simply computes a phred-scaled double quality + * score given an error rate. It has the same precision as a normal double operation + * + * @param trueRate the probability of being right (0.0-1.0) + * @return a phred-scaled version of the error rate implied by trueRate + */ + @Requires("MathUtils.goodProbability(trueRate)") + @Ensures("result >= 0.0") + public static double phredScaleCorrectRate(final double trueRate) { + return phredScaleLog10ErrorRate(MathUtils.log10OneMinusX(trueRate)); + } + + /** + * Convert a probability of being wrong to a phred-scaled quality score of being wrong as a double + * + * This is a very generic method, that simply computes a phred-scaled double quality + * score given an error rate. It has the same precision as a normal double operation + * + * @param errorRate the probability of being wrong (0.0-1.0) + * @return a phred-scaled version of the error rate + */ + @Requires("MathUtils.goodProbability(errorRate)") + @Ensures("result >= 0.0") + public static double phredScaleErrorRate(final double errorRate) { + return phredScaleLog10ErrorRate(Math.log10(errorRate)); + } + + /** + * Convert a log10 probability of being wrong to a phred-scaled quality score of being wrong as a double + * + * This is a very generic method, that simply computes a phred-scaled double quality + * score given an error rate. It has the same precision as a normal double operation + * + * @param errorRateLog10 the log10 probability of being wrong (0.0-1.0) + * @return a phred-scaled version of the error rate + */ + @Ensures("result >= 0.0") + public static double phredScaleLog10ErrorRate(final double errorRateLog10) { + return -10.0 * Math.max(errorRateLog10, RAW_MIN_PHRED_SCALED_QUAL); + } + + // ---------------------------------------------------------------------- + // + // Routines to bound a quality score to a reasonable range + // + // ---------------------------------------------------------------------- + + /** + * Return a quality score that bounds qual by MAX_SAM_QUAL_SCORE and 1 + * + * @param qual the uncapped quality score as an integer + * @return the bounded quality score + */ + @Requires("qual >= 0") + @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (MAX_SAM_QUAL_SCORE & 0xFF)") + public static byte boundQual(int qual) { + return boundQual(qual, MAX_SAM_QUAL_SCORE); + } + + /** + * Return a quality score that bounds qual by maxQual and 1 + * + * WARNING -- because this function takes a byte for maxQual, you must be careful in converting + * integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF)) + * + * @param qual the uncapped quality score as an integer + * @param maxQual the maximum quality score, must be less < 255 + * @return the bounded quality score + */ + @Requires({"qual >= 0"}) + @Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)") + public static byte boundQual(final int qual, final byte maxQual) { + return (byte) (Math.max(Math.min(qual, maxQual & 0xFF), 1) & 0xFF); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index c78294505..afd51eb26 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -94,8 +94,7 @@ public class DupUtils { Arrays.sort(probs); double normalizedP = Math.pow(10, bestProb) / sumProbs; - double eps = Math.pow(10, -maxQScore/10.0); - byte qual = QualityUtils.probToQual(normalizedP, eps); + byte qual = QualityUtils.trueProbToQual(normalizedP, maxQScore); // if ( false ) { // System.out.printf("Best base is %s %.8f%n", bestBase, bestProb); // System.out.printf("2nd base is %.8f%n", probs[1]); diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java index 8069ea29f..17d09c844 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java @@ -129,7 +129,7 @@ public class LIBSPerformance extends CommandLineProgram { // read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); // final byte[] quals = new byte[readLength]; // for ( int i = 0; i < readLength; i++ ) -// quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); +// quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); // read.setBaseQualities(quals); // read.setCigarString(cigar); // diff --git a/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java index 997c8750c..1efce3cb0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/QualityUtilsUnitTest.java @@ -34,6 +34,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.*; @@ -42,10 +43,64 @@ import java.util.*; * Basic unit test for QualityUtils class */ public class QualityUtilsUnitTest extends BaseTest { + final private static double TOLERANCE = 1e-9; + @BeforeClass public void init() { } + @DataProvider(name = "QualTest") + public Object[][] makeMyDataProvider() { + List tests = new ArrayList(); + + for ( int qual = 0; qual < 255; qual++ ) { + tests.add(new Object[]{(byte)(qual & 0xFF), Math.pow(10.0, ((double)qual)/-10.0)}); + } + + return tests.toArray(new Object[][]{}); + } + + /** + * Example testng test using MyDataProvider + */ + @Test(dataProvider = "QualTest") + public void testMyData(final byte qual, final double errorRate) { + final double trueRate = 1 - errorRate; + + final double actualErrorRate = QualityUtils.qualToErrorProb(qual); + Assert.assertEquals(actualErrorRate, errorRate, TOLERANCE); + final double actualTrueRate = QualityUtils.qualToProb(qual); + Assert.assertEquals(actualTrueRate, trueRate, TOLERANCE); + + // log10 tests + final double actualLog10ErrorRate = QualityUtils.qualToErrorProbLog10(qual); + Assert.assertEquals(actualLog10ErrorRate, Math.log10(errorRate), TOLERANCE); + final double actualLog10TrueRate = QualityUtils.qualToProbLog10(qual); + Assert.assertEquals(actualLog10TrueRate, Math.log10(trueRate), TOLERANCE); + + // test that we can convert our error rates to quals, accounting for boundaries + final int expectedQual = Math.max(Math.min(qual & 0xFF, QualityUtils.MAX_SAM_QUAL_SCORE), 1); + final byte actualQual = QualityUtils.trueProbToQual(trueRate); + Assert.assertEquals(actualQual, expectedQual & 0xFF); + final byte actualQualFromErrorRate = QualityUtils.errorProbToQual(errorRate); + Assert.assertEquals(actualQualFromErrorRate, expectedQual & 0xFF); + + for ( int maxQual = 10; maxQual < QualityUtils.MAX_SAM_QUAL_SCORE; maxQual++ ) { + final byte maxAsByte = (byte)(maxQual & 0xFF); + final byte expectedQual2 = (byte)(Math.max(Math.min(qual & 0xFF, maxQual), 1) & 0xFF); + final byte actualQual2 = QualityUtils.trueProbToQual(trueRate, maxAsByte); + Assert.assertEquals(actualQual2, expectedQual2, "Failed with max " + maxQual); + final byte actualQualFromErrorRate2 = QualityUtils.errorProbToQual(errorRate, maxAsByte); + Assert.assertEquals(actualQualFromErrorRate2, expectedQual2, "Failed with max " + maxQual); + + // test the integer routines + final byte actualQualInt2 = QualityUtils.trueProbToQual(trueRate, maxQual); + Assert.assertEquals(actualQualInt2, expectedQual2, "Failed with max " + maxQual); + final byte actualQualFromErrorRateInt2 = QualityUtils.errorProbToQual(errorRate, maxQual); + Assert.assertEquals(actualQualFromErrorRateInt2, expectedQual2, "Failed with max " + maxQual); + } + } + @Test public void testQualCaches() { Assert.assertEquals(QualityUtils.qualToErrorProb((byte) 20), 0.01, 1e-6); @@ -63,4 +118,59 @@ public class QualityUtilsUnitTest extends BaseTest { Assert.assertEquals(QualityUtils.qualToProb((byte) 40), 0.9999, 1e-6); Assert.assertEquals(QualityUtils.qualToProbLog10((byte) 40), -4.34316198e-5, 1e-6); } + + @Test() + public void testBoundingDefault() { + for ( int qual = 0; qual < 1000; qual++ ) { + final byte expected = (byte)Math.max(Math.min(qual, QualityUtils.MAX_SAM_QUAL_SCORE), 1); + Assert.assertEquals(QualityUtils.boundQual(qual), expected); + } + } + + @Test() + public void testBoundingWithMax() { + for ( int max = 10; max < 255; max += 50 ) { + for ( int qual = 0; qual < 1000; qual++ ) { + final int expected = Math.max(Math.min(qual, max), 1); + Assert.assertEquals(QualityUtils.boundQual(qual, (byte)(max & 0xFF)) & 0xFF, expected & 0xFF, "qual " + qual + " max " + max); + } + } + } + + @DataProvider(name = "PhredScaleDoubleOps") + public Object[][] makePhredDoubleTest() { + List tests = new ArrayList(); + + tests.add(new Object[]{0.0, -10 * Math.log10(Double.MIN_VALUE)}); + tests.add(new Object[]{1.0, 0.0}); + for ( int pow = 1; pow < 20; pow++ ) { + tests.add(new Object[]{Math.pow(10.0, -1.0 * pow), pow * 10}); + tests.add(new Object[]{Math.pow(10.0, -1.5 * pow), pow * 15}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test() + public void testQualToErrorProbDouble() { + for ( double qual = 3.0; qual < 255.0; qual += 0.1 ) { + final double expected = Math.pow(10.0, qual / -10.0); + Assert.assertEquals(QualityUtils.qualToErrorProb(qual), expected, TOLERANCE, "failed qual->error prob for double qual " + qual); + } + } + + + @Test(dataProvider = "PhredScaleDoubleOps") + public void testPhredScaleDoubleOps(final double errorRate, final double expectedPhredScaled) { + final double actualError = QualityUtils.phredScaleErrorRate(errorRate); + Assert.assertEquals(actualError, expectedPhredScaled, TOLERANCE); + final double trueRate = 1 - errorRate; + final double actualTrue = QualityUtils.phredScaleCorrectRate(trueRate); + if ( trueRate == 1.0 ) { + Assert.assertEquals(actualTrue, QualityUtils.MIN_PHRED_SCALED_QUAL); + } else { + final double tol = errorRate < 1e-10 ? 10.0 : 1e-3; + Assert.assertEquals(actualTrue, expectedPhredScaled, tol); + } + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java index e52cd46cc..9c3472752 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator; import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; @@ -63,7 +62,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark { read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) - quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); + quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigar); reads.add(read); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index 1a51440ad..ee65109ca 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -141,7 +141,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest { read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) - quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); + quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigarString); return read; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index eb7e61ed8..fd87c1c12 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -315,7 +315,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { read.setReadBases(("TT" + eventBases + "A").getBytes()); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) - quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE); + quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigar);