Cleanup and unit tests for QualityUtils
-- Fixed a few conversion bugs with edge case quals (ones that were very high) -- Fixed a critical bug in the conversion of quals that was causing near capped quals to fall below their actual value. Will undoubtedly need to fix md5s -- More precise prob -> qual calculations for very high confidence events in phredScaleCorrectRate, trueProbToQual, and errorProbToQual. Very likely to improve accuracy of many calculations in the GATK -- Added errorProbToQual and trueProbToQual calculations that accept an integer cap, and perform the (tricky) conversion from int to byte correctly. -- Full docs and unit tests for phredScaleCorrectRate and phredScaleErrorRate. -- Renamed probToQual to trueProbToQual -- Added goodProbability and log10OneMinusX to MathUtils -- Went through the GATK and cleaned up many uses of QualityUtils -- Cleanup constants in QualityUtils -- Added full docs for all of the constants -- Rename MAX_QUAL_SCORE to MAX_SAM_QUAL_SCORE for clarity -- Moved MAX_GATK_USABLE_Q_SCORE to RecalDatum, as it's s BQSR specific feature -- Convert uses of QualityUtils.errorProbToQual(1-x) to QualityUtils.trueProbToQual(x) -- Cleanup duplicate quality score routines in MathUtils. Moved and renamed MathUtils.log10ProbabilityToPhredScale => QualityUtils.phredScaleLog10ErrorRate. Removed 3 routines from MathUtils, and remapped their usages into the better routines in QualityUtils
This commit is contained in:
parent
307f709cc7
commit
9e28d1e347
|
|
@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo {
|
|||
}
|
||||
|
||||
private boolean validQual(final byte result) {
|
||||
return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE;
|
||||
return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
|
||||
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_SAM_QUAL_SCORE +1][BaseUtils.BASES.length+1][QualityUtils.MAX_SAM_QUAL_SCORE +1][MAX_PLOIDY];
|
||||
|
||||
protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
|
||||
return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -123,7 +124,7 @@ public class ErrorModel {
|
|||
}
|
||||
}
|
||||
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
double p = QualityUtils.qualToErrorProbLog10((byte)(maxQualityScore-minQualityScore));
|
||||
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
// maximum uncertainty if there's no ref data at site
|
||||
|
|
@ -270,7 +271,7 @@ public class ErrorModel {
|
|||
})
|
||||
private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) {
|
||||
// same as log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows
|
||||
double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage;
|
||||
double lambda = QualityUtils.qualToErrorProb(q) * (double )coverage;
|
||||
// log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k)
|
||||
return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -613,7 +613,7 @@ public class UnifiedGenotyperEngine {
|
|||
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||
}
|
||||
|
||||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
return new VariantCallContext(vc, QualityUtils.phredScaleCorrectRate(P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
}
|
||||
|
||||
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
|
|
@ -395,7 +396,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
int phredScoreTransmission = -1;
|
||||
if(transmissionProb != NO_TRANSMISSION_PROB){
|
||||
double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
|
||||
double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
|
||||
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
|
||||
}
|
||||
//Handle null, missing and unavailable genotypes
|
||||
|
|
|
|||
|
|
@ -1017,7 +1017,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
// Determine the phase at this position:
|
||||
this.maxEntry = hapTable.maxEntry();
|
||||
|
||||
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
|
||||
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.trueProbToQual(posteriorProb):
|
||||
PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
|
||||
for (PhasingTable.PhasingTableEntry pte : hapTable) {
|
||||
if (pte != maxEntry)
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ import net.sf.samtools.SAMUtils;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -174,7 +173,7 @@ public class BaseRecalibration {
|
|||
double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );
|
||||
|
||||
// recalibrated quality is bound between 1 and MAX_QUAL
|
||||
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), RecalDatum.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
// return the quantized version of the recalibrated quality
|
||||
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ public class QualQuantizer {
|
|||
"nObservations >= 0",
|
||||
"nErrors >= 0",
|
||||
"nErrors <= nObservations",
|
||||
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE",
|
||||
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE",
|
||||
"mergeOrder >= 0"})
|
||||
protected final class QualInterval implements Comparable<QualInterval> {
|
||||
final int qStart, qEnd, fixedQual, level;
|
||||
|
|
@ -224,10 +224,10 @@ public class QualQuantizer {
|
|||
/**
|
||||
* @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
|
||||
*/
|
||||
@Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
|
||||
@Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE")
|
||||
public byte getQual() {
|
||||
if ( ! hasFixedQual() )
|
||||
return QualityUtils.probToQual(1-getErrorRate(), 0);
|
||||
return QualityUtils.errorProbToQual(getErrorRate());
|
||||
else
|
||||
return (byte)fixedQual;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ public class QuantizationInfo {
|
|||
}
|
||||
|
||||
public void noQuantization() {
|
||||
this.quantizationLevels = QualityUtils.MAX_QUAL_SCORE;
|
||||
this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
for (int i = 0; i < this.quantizationLevels; i++)
|
||||
quantizedQuals.set(i, (byte) i);
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ public class QuantizationInfo {
|
|||
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
|
||||
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||
|
|
|
|||
|
|
@ -74,10 +74,10 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.commons.math.optimization.fitting.GaussianFunction;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -100,6 +100,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
"numMismatches <= numObservations"
|
||||
})
|
||||
public class RecalDatum {
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
/**
|
||||
|
|
@ -337,7 +338,7 @@ public class RecalDatum {
|
|||
// This is the old and busted point estimate approach:
|
||||
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
|
|
@ -369,7 +370,12 @@ public class RecalDatum {
|
|||
return Qemp;
|
||||
}
|
||||
|
||||
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
/**
|
||||
* Quals above this value should be capped down to this value (because they are too high)
|
||||
* in the base quality score recalibrator
|
||||
*/
|
||||
public final static byte MAX_GATK_USABLE_Q_SCORE = 40;
|
||||
static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
static {
|
||||
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
|
||||
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
|
||||
|
|
@ -379,7 +385,7 @@ public class RecalDatum {
|
|||
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
|
||||
|
||||
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
|
||||
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
double log10Prior = Math.log10(gaussian.value((double) i));
|
||||
if ( Double.isInfinite(log10Prior) )
|
||||
log10Prior = -Double.MAX_VALUE;
|
||||
|
|
@ -388,7 +394,7 @@ public class RecalDatum {
|
|||
}
|
||||
|
||||
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE);
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
|
||||
return log10QempPriorCache[difference];
|
||||
|
|
|
|||
|
|
@ -263,11 +263,11 @@ public class RecalibrationReport {
|
|||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||
*
|
||||
* @param table the GATKReportTable containing the quantization mappings
|
||||
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
||||
* @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE
|
||||
*/
|
||||
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
|
|
|||
|
|
@ -119,6 +119,6 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return QualityUtils.MAX_QUAL_SCORE;
|
||||
return QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
@ -90,7 +90,7 @@ public class QualQuantizerUnitTest extends BaseTest {
|
|||
this.exError = exError;
|
||||
this.exTotal = exTotal;
|
||||
this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1));
|
||||
this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0);
|
||||
this.exQual = QualityUtils.errorProbToQual(this.exErrorRate);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
|
@ -58,7 +57,6 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
|
|
@ -207,8 +205,8 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testlog10QempPrior() {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
Assert.assertTrue(log10prior < 0.0);
|
||||
Assert.assertFalse(Double.isInfinite(log10prior));
|
||||
|
|
@ -219,7 +217,7 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
final int Qrep = 20;
|
||||
int maxQemp = -1;
|
||||
double maxQempValue = -Double.MAX_VALUE;
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
if ( log10prior > maxQempValue ) {
|
||||
maxQemp = Qemp;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public class RecalibrationReportUnitTest {
|
|||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
|
|
@ -75,10 +75,10 @@ public class RecalibrationReportUnitTest {
|
|||
public void testOutput() {
|
||||
final int length = 100;
|
||||
|
||||
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
|
||||
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
|
||||
|
||||
for (int i = 0; i<= QualityUtils.MAX_QUAL_SCORE; i++) {
|
||||
for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
|
||||
quals.add((byte) i);
|
||||
counts.add(1L);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
|||
final int mismatches = (Integer)table.get(key, "mismatches");
|
||||
final int count = (Integer)table.get(key, "counts");
|
||||
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
|
||||
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
|
||||
final int qual = QualityUtils.errorProbToQual(errorRate);
|
||||
table.set(key, "qual", qual);
|
||||
table.set(key, "errorrate", errorRate);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -408,7 +409,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
return genotype.getGQ() >= minGenotypeQuality;
|
||||
} else if ( genotype.hasLikelihoods() ) {
|
||||
double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector());
|
||||
return MathUtils.log10ProbabilityToPhredScale(log10gq) >= minGenotypeQuality;
|
||||
return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= minGenotypeQuality;
|
||||
}
|
||||
|
||||
return minGenotypeQuality <= 0;
|
||||
|
|
|
|||
|
|
@ -1251,6 +1251,16 @@ public class MathUtils {
|
|||
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the result is a well-formed probability
|
||||
*
|
||||
* @param result a supposedly well-formed probability value
|
||||
* @return true if result is really well formed
|
||||
*/
|
||||
public static boolean goodProbability(final double result) {
|
||||
return result >= 0.0 && result <= 1.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that computes on the fly average and standard deviation for a stream of numbers.
|
||||
* The number of observations does not have to be known in advance, and can be also very big (so that
|
||||
|
|
@ -1343,28 +1353,6 @@ public class MathUtils {
|
|||
return Math.max(a, x2);
|
||||
}
|
||||
|
||||
public static double phredScaleToProbability(byte q) {
|
||||
return Math.pow(10, (-q) / 10.0);
|
||||
}
|
||||
|
||||
public static double phredScaleToLog10Probability(byte q) {
|
||||
return ((-q) / 10.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the phred scaled value of probability p
|
||||
*
|
||||
* @param p probability (between 0 and 1).
|
||||
* @return phred scaled probability of p
|
||||
*/
|
||||
public static byte probabilityToPhredScale(double p) {
|
||||
return (byte) ((-10) * Math.log10(p));
|
||||
}
|
||||
|
||||
public static double log10ProbabilityToPhredScale(double log10p) {
|
||||
return (-10) * log10p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts LN to LOG10
|
||||
*
|
||||
|
|
@ -1774,4 +1762,24 @@ public class MathUtils {
|
|||
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute in a numerical correct way the quanity log10(1-x)
|
||||
*
|
||||
* Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow
|
||||
* in 1-x when x is very small
|
||||
*
|
||||
* @param x a positive double value between 0.0 and 1.0
|
||||
* @return an estimate of log10(1-x)
|
||||
*/
|
||||
@Requires("x >= 0.0 && x <= 1.0")
|
||||
@Ensures("result <= 0.0")
|
||||
public static double log10OneMinusX(final double x) {
|
||||
if ( x == 1.0 )
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
else if ( x == 0.0 )
|
||||
return 0.0;
|
||||
else
|
||||
return Math.log10(1 / x - 1) + Math.log10(x);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,38 +25,50 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
|
||||
/**
|
||||
* QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating
|
||||
* quality scores.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
* @author Kiran Garimella, Mark DePristo
|
||||
* @since Way back
|
||||
*/
|
||||
public class QualityUtils {
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||
/**
|
||||
* Maximum quality score that can be encoded in a SAM/BAM file
|
||||
*/
|
||||
public final static byte MAX_SAM_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
|
||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||
public final static byte MAX_REASONABLE_Q_SCORE = 60; // bams containing quals above this value are extremely suspicious and we should warn the user
|
||||
public final static byte MAX_GATK_USABLE_Q_SCORE = 40; // quals above this value should be capped down to this value (because they are too high)
|
||||
|
||||
private final static double RAW_MIN_PHRED_SCALED_QUAL = Math.log10(Double.MIN_VALUE);
|
||||
protected final static double MIN_PHRED_SCALED_QUAL = -10.0 * RAW_MIN_PHRED_SCALED_QUAL;
|
||||
|
||||
/**
|
||||
* bams containing quals above this value are extremely suspicious and we should warn the user
|
||||
*/
|
||||
public final static byte MAX_REASONABLE_Q_SCORE = 60;
|
||||
|
||||
/**
|
||||
* The lowest quality score for a base that is considered reasonable for statistical analysis. This is
|
||||
* because Q 6 => you stand a 25% of being right, which means all bases are equally likely
|
||||
*/
|
||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
|
||||
|
||||
/**
|
||||
* Cached values for qual as byte calculations so they are very fast
|
||||
*/
|
||||
private static double qualToErrorProbCache[] = new double[256];
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
|
||||
}
|
||||
|
||||
private static double qualToErrorProbLog10Cache[] = new double[256];
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbLog10Cache[i] = qualToErrorProbLog10Raw(i);
|
||||
}
|
||||
|
||||
private static double qualToProbLog10Cache[] = new double[256];
|
||||
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToProbLog10Cache[i] = qualToProbLog10Raw(i);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
qualToErrorProbCache[i] = qualToErrorProb((double) i);
|
||||
qualToProbLog10Cache[i] = Math.log10(1.0 - qualToErrorProbCache[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -64,111 +76,301 @@ public class QualityUtils {
|
|||
*/
|
||||
private QualityUtils() {}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// These are all functions to convert a phred-scaled quality score to a probability
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a quality score to a probability. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
* Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
|
||||
*
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* Because the input is a discretized byte value, this function uses a cache so is very efficient
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param qual a quality score (0-255)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public double qualToProb(byte qual) {
|
||||
@Ensures("result >= 0.0 && result <= 1.0")
|
||||
public static double qualToProb(final byte qual) {
|
||||
return 1.0 - qualToErrorProb(qual);
|
||||
}
|
||||
|
||||
static public double qualToProb(double qual) {
|
||||
return 1.0 - Math.pow(10.0, qual/(-10.0));
|
||||
/**
|
||||
* Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
|
||||
*
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* Because the input is a double value, this function must call Math.pow so can be quite expensive
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
@Requires("qual >= 0.0")
|
||||
@Ensures("result >= 0.0 && result <= 1.0")
|
||||
public static double qualToProb(final double qual) {
|
||||
return 1.0 - qualToErrorProb(qual);
|
||||
}
|
||||
|
||||
static private double qualToProbLog10Raw(int qual) {
|
||||
return Math.log10(1.0 - qualToErrorProbRaw(qual));
|
||||
}
|
||||
|
||||
static public double qualToProbLog10(byte qual) {
|
||||
/**
|
||||
* Convert a phred-scaled quality score to its log10 probability of being true (Q30 => log10(0.999))
|
||||
*
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* Because the input is a double value, this function must call Math.pow so can be quite expensive
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
@Ensures("result <= 0.0")
|
||||
public static double qualToProbLog10(final byte qual) {
|
||||
return qualToProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a quality score to a probability of error. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
* Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
|
||||
*
|
||||
* @param qual a quality score (0 - 255)
|
||||
* @return a probability (0.0 - 1.0)
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* Because the input is a double value, this function must call Math.pow so can be quite expensive
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static private double qualToErrorProbRaw(int qual) {
|
||||
return qualToErrorProb((double) qual);
|
||||
}
|
||||
|
||||
@Requires("qual >= 0.0")
|
||||
@Ensures("result >= 0.0 && result <= 1.0")
|
||||
public static double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual/-10.0);
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
/**
|
||||
* Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
|
||||
*
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* Because the input is a byte value, this function uses a cache so is very efficient
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a byte
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
@Ensures("result >= 0.0 && result <= 1.0")
|
||||
public static double qualToErrorProb(final byte qual) {
|
||||
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
static private double qualToErrorProbLog10Raw(int qual) {
|
||||
return ((double) qual)/-10.0;
|
||||
}
|
||||
|
||||
static public double qualToErrorProbLog10(byte qual) {
|
||||
return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
static public double qualToErrorProbLog10(final double qual) {
|
||||
return qual/-10.0;
|
||||
/**
|
||||
* Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
|
||||
*
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* The calculation is extremely efficient
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a byte
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
@Ensures("result <= 0.0")
|
||||
public static double qualToErrorProbLog10(final byte qual) {
|
||||
return qualToErrorProbLog10((double)(qual & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||
* Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
|
||||
*
|
||||
* @param prob a probability (0.0-1.0)
|
||||
* @return a quality score (0-40)
|
||||
* This is the Phred-style conversion, *not* the Illumina-style conversion.
|
||||
*
|
||||
* The calculation is extremely efficient
|
||||
*
|
||||
* @param qual a phred-scaled quality score encoded as a double
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public byte probToQual(double prob) {
|
||||
return probToQual(prob, MIN_REASONABLE_ERROR);
|
||||
//return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001));
|
||||
@Ensures("result <= 0.0")
|
||||
public static double qualToErrorProbLog10(final double qual) {
|
||||
return qual / -10.0;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Functions to convert a probability to a phred-scaled quality score
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
|
||||
*
|
||||
* Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
|
||||
* and by 1 at the low-end.
|
||||
*
|
||||
* @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
|
||||
* @return a quality score (0-MAX_SAM_QUAL_SCORE)
|
||||
*/
|
||||
@Requires("errorRate >= 0.0 && errorRate <= 1.0")
|
||||
public static byte errorProbToQual(final double errorRate) {
|
||||
return errorProbToQual(errorRate, MAX_SAM_QUAL_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at a quality score which is determined by _eps_.
|
||||
* Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
|
||||
*
|
||||
* @param prob a probability (0.0-1.0)
|
||||
* @param eps min probabilty allowed (0.0-1.0)
|
||||
* @return a quality score (0-255)
|
||||
* Note, this function caps the resulting quality score by the public static value MIN_REASONABLE_ERROR
|
||||
* and by 1 at the low-end.
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
|
||||
* @return a quality score (0-maxQual)
|
||||
*/
|
||||
static public byte probToQual(double prob, double eps) {
|
||||
double lp = Math.round(-10.0*Math.log10(1.0 - prob + eps));
|
||||
//System.out.printf("LP is %f, byte is %d%n", lp, b);
|
||||
return boundQual((int)lp);
|
||||
}
|
||||
|
||||
static public double phredScaleCorrectRate(double trueRate) {
|
||||
return phredScaleErrorRate(1-trueRate);
|
||||
}
|
||||
|
||||
static public double phredScaleErrorRate(double errorRate) {
|
||||
return Math.abs(-10.0*Math.log10(errorRate));
|
||||
@Requires("errorRate >= 0.0 && errorRate <= 1.0")
|
||||
public static byte errorProbToQual(final double errorRate, final byte maxQual) {
|
||||
final double d = Math.round(-10.0*Math.log10(errorRate));
|
||||
return boundQual((int)d, maxQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a quality score, capped at max qual.
|
||||
*
|
||||
* @param qual the uncapped quality score
|
||||
* @return the capped quality score
|
||||
* @see #errorProbToQual(double, byte) with proper conversion of maxQual integer to a byte
|
||||
*/
|
||||
static public byte boundQual(int qual) {
|
||||
return boundQual(qual, MAX_QUAL_SCORE);
|
||||
@Requires("maxQual >= 0 && maxQual < 255")
|
||||
public static byte errorProbToQual(final double prob, final int maxQual) {
|
||||
return errorProbToQual(prob, (byte)(maxQual & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an integer quality score bounded by 1 - maxQual.
|
||||
* Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
|
||||
*
|
||||
* @param qual the quality score
|
||||
* @param maxQual the maximum quality
|
||||
* @return the integer betwen 1 and maxqual.
|
||||
* Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
|
||||
* and by 1 at the low-end.
|
||||
*
|
||||
* @param prob a probability (0.0-1.0) of being right
|
||||
* @return a quality score (0-MAX_SAM_QUAL_SCORE)
|
||||
*/
|
||||
static public byte boundQual(int qual, byte maxQual) {
|
||||
return (byte) Math.max(Math.min(qual, maxQual), 1);
|
||||
@Requires("prob >= 0.0 && prob <= 1.0")
|
||||
public static byte trueProbToQual(final double prob) {
|
||||
return trueProbToQual(prob, MAX_SAM_QUAL_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
|
||||
*
|
||||
* Note, this function caps the resulting quality score by the min probability allowed (EPS).
|
||||
* So for example, if prob is 1e-6, which would imply a Q-score of 60, and EPS is 1e-4,
|
||||
* the result of this function is actually Q40.
|
||||
*
|
||||
* Note that the resulting quality score, regardless of EPS, is capped by MAX_SAM_QUAL_SCORE and
|
||||
* bounded on the low-side by 1.
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param prob a probability (0.0-1.0) of being right
|
||||
* @param maxQual the maximum quality score we are allowed to emit here, regardless of the error rate
|
||||
* @return a phred-scaled quality score (0-maxQualScore) as a byte
|
||||
*/
|
||||
@Requires({
|
||||
"prob >= 0.0 && prob <= 1.0"
|
||||
})
|
||||
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
|
||||
public static byte trueProbToQual(final double prob, final byte maxQual) {
|
||||
final double lp = Math.round(-10.0*MathUtils.log10OneMinusX(prob));
|
||||
return boundQual((int)lp, maxQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #trueProbToQual(double, byte) with proper conversion of maxQual to a byte
|
||||
*/
|
||||
@Requires("maxQual >= 0 && maxQual < 255")
|
||||
public static byte trueProbToQual(final double prob, final int maxQual) {
|
||||
return trueProbToQual(prob, (byte)(maxQual & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability of being right to a phred-scaled quality score of being wrong as a double
|
||||
*
|
||||
* This is a very generic method, that simply computes a phred-scaled double quality
|
||||
* score given an error rate. It has the same precision as a normal double operation
|
||||
*
|
||||
* @param trueRate the probability of being right (0.0-1.0)
|
||||
* @return a phred-scaled version of the error rate implied by trueRate
|
||||
*/
|
||||
@Requires("MathUtils.goodProbability(trueRate)")
|
||||
@Ensures("result >= 0.0")
|
||||
public static double phredScaleCorrectRate(final double trueRate) {
|
||||
return phredScaleLog10ErrorRate(MathUtils.log10OneMinusX(trueRate));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability of being wrong to a phred-scaled quality score of being wrong as a double
|
||||
*
|
||||
* This is a very generic method, that simply computes a phred-scaled double quality
|
||||
* score given an error rate. It has the same precision as a normal double operation
|
||||
*
|
||||
* @param errorRate the probability of being wrong (0.0-1.0)
|
||||
* @return a phred-scaled version of the error rate
|
||||
*/
|
||||
@Requires("MathUtils.goodProbability(errorRate)")
|
||||
@Ensures("result >= 0.0")
|
||||
public static double phredScaleErrorRate(final double errorRate) {
|
||||
return phredScaleLog10ErrorRate(Math.log10(errorRate));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a log10 probability of being wrong to a phred-scaled quality score of being wrong as a double
|
||||
*
|
||||
* This is a very generic method, that simply computes a phred-scaled double quality
|
||||
* score given an error rate. It has the same precision as a normal double operation
|
||||
*
|
||||
* @param errorRateLog10 the log10 probability of being wrong (0.0-1.0)
|
||||
* @return a phred-scaled version of the error rate
|
||||
*/
|
||||
@Ensures("result >= 0.0")
|
||||
public static double phredScaleLog10ErrorRate(final double errorRateLog10) {
|
||||
return -10.0 * Math.max(errorRateLog10, RAW_MIN_PHRED_SCALED_QUAL);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Routines to bound a quality score to a reasonable range
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return a quality score that bounds qual by MAX_SAM_QUAL_SCORE and 1
|
||||
*
|
||||
* @param qual the uncapped quality score as an integer
|
||||
* @return the bounded quality score
|
||||
*/
|
||||
@Requires("qual >= 0")
|
||||
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (MAX_SAM_QUAL_SCORE & 0xFF)")
|
||||
public static byte boundQual(int qual) {
|
||||
return boundQual(qual, MAX_SAM_QUAL_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a quality score that bounds qual by maxQual and 1
|
||||
*
|
||||
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
|
||||
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
|
||||
*
|
||||
* @param qual the uncapped quality score as an integer
|
||||
* @param maxQual the maximum quality score, must be less < 255
|
||||
* @return the bounded quality score
|
||||
*/
|
||||
@Requires({"qual >= 0"})
|
||||
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
|
||||
public static byte boundQual(final int qual, final byte maxQual) {
|
||||
return (byte) (Math.max(Math.min(qual, maxQual & 0xFF), 1) & 0xFF);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,8 +94,7 @@ public class DupUtils {
|
|||
|
||||
Arrays.sort(probs);
|
||||
double normalizedP = Math.pow(10, bestProb) / sumProbs;
|
||||
double eps = Math.pow(10, -maxQScore/10.0);
|
||||
byte qual = QualityUtils.probToQual(normalizedP, eps);
|
||||
byte qual = QualityUtils.trueProbToQual(normalizedP, maxQScore);
|
||||
// if ( false ) {
|
||||
// System.out.printf("Best base is %s %.8f%n", bestBase, bestProb);
|
||||
// System.out.printf("2nd base is %.8f%n", probs[1]);
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class LIBSPerformance extends CommandLineProgram {
|
|||
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
// final byte[] quals = new byte[readLength];
|
||||
// for ( int i = 0; i < readLength; i++ )
|
||||
// quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
// quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
// read.setBaseQualities(quals);
|
||||
// read.setCigarString(cigar);
|
||||
//
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ package org.broadinstitute.sting.utils;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -42,10 +43,64 @@ import java.util.*;
|
|||
* Basic unit test for QualityUtils class
|
||||
*/
|
||||
public class QualityUtilsUnitTest extends BaseTest {
|
||||
final private static double TOLERANCE = 1e-9;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
}
|
||||
|
||||
@DataProvider(name = "QualTest")
|
||||
public Object[][] makeMyDataProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( int qual = 0; qual < 255; qual++ ) {
|
||||
tests.add(new Object[]{(byte)(qual & 0xFF), Math.pow(10.0, ((double)qual)/-10.0)});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
/**
|
||||
* Example testng test using MyDataProvider
|
||||
*/
|
||||
@Test(dataProvider = "QualTest")
|
||||
public void testMyData(final byte qual, final double errorRate) {
|
||||
final double trueRate = 1 - errorRate;
|
||||
|
||||
final double actualErrorRate = QualityUtils.qualToErrorProb(qual);
|
||||
Assert.assertEquals(actualErrorRate, errorRate, TOLERANCE);
|
||||
final double actualTrueRate = QualityUtils.qualToProb(qual);
|
||||
Assert.assertEquals(actualTrueRate, trueRate, TOLERANCE);
|
||||
|
||||
// log10 tests
|
||||
final double actualLog10ErrorRate = QualityUtils.qualToErrorProbLog10(qual);
|
||||
Assert.assertEquals(actualLog10ErrorRate, Math.log10(errorRate), TOLERANCE);
|
||||
final double actualLog10TrueRate = QualityUtils.qualToProbLog10(qual);
|
||||
Assert.assertEquals(actualLog10TrueRate, Math.log10(trueRate), TOLERANCE);
|
||||
|
||||
// test that we can convert our error rates to quals, accounting for boundaries
|
||||
final int expectedQual = Math.max(Math.min(qual & 0xFF, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
|
||||
final byte actualQual = QualityUtils.trueProbToQual(trueRate);
|
||||
Assert.assertEquals(actualQual, expectedQual & 0xFF);
|
||||
final byte actualQualFromErrorRate = QualityUtils.errorProbToQual(errorRate);
|
||||
Assert.assertEquals(actualQualFromErrorRate, expectedQual & 0xFF);
|
||||
|
||||
for ( int maxQual = 10; maxQual < QualityUtils.MAX_SAM_QUAL_SCORE; maxQual++ ) {
|
||||
final byte maxAsByte = (byte)(maxQual & 0xFF);
|
||||
final byte expectedQual2 = (byte)(Math.max(Math.min(qual & 0xFF, maxQual), 1) & 0xFF);
|
||||
final byte actualQual2 = QualityUtils.trueProbToQual(trueRate, maxAsByte);
|
||||
Assert.assertEquals(actualQual2, expectedQual2, "Failed with max " + maxQual);
|
||||
final byte actualQualFromErrorRate2 = QualityUtils.errorProbToQual(errorRate, maxAsByte);
|
||||
Assert.assertEquals(actualQualFromErrorRate2, expectedQual2, "Failed with max " + maxQual);
|
||||
|
||||
// test the integer routines
|
||||
final byte actualQualInt2 = QualityUtils.trueProbToQual(trueRate, maxQual);
|
||||
Assert.assertEquals(actualQualInt2, expectedQual2, "Failed with max " + maxQual);
|
||||
final byte actualQualFromErrorRateInt2 = QualityUtils.errorProbToQual(errorRate, maxQual);
|
||||
Assert.assertEquals(actualQualFromErrorRateInt2, expectedQual2, "Failed with max " + maxQual);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQualCaches() {
|
||||
Assert.assertEquals(QualityUtils.qualToErrorProb((byte) 20), 0.01, 1e-6);
|
||||
|
|
@ -63,4 +118,59 @@ public class QualityUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(QualityUtils.qualToProb((byte) 40), 0.9999, 1e-6);
|
||||
Assert.assertEquals(QualityUtils.qualToProbLog10((byte) 40), -4.34316198e-5, 1e-6);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testBoundingDefault() {
|
||||
for ( int qual = 0; qual < 1000; qual++ ) {
|
||||
final byte expected = (byte)Math.max(Math.min(qual, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
|
||||
Assert.assertEquals(QualityUtils.boundQual(qual), expected);
|
||||
}
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testBoundingWithMax() {
|
||||
for ( int max = 10; max < 255; max += 50 ) {
|
||||
for ( int qual = 0; qual < 1000; qual++ ) {
|
||||
final int expected = Math.max(Math.min(qual, max), 1);
|
||||
Assert.assertEquals(QualityUtils.boundQual(qual, (byte)(max & 0xFF)) & 0xFF, expected & 0xFF, "qual " + qual + " max " + max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PhredScaleDoubleOps")
|
||||
public Object[][] makePhredDoubleTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{0.0, -10 * Math.log10(Double.MIN_VALUE)});
|
||||
tests.add(new Object[]{1.0, 0.0});
|
||||
for ( int pow = 1; pow < 20; pow++ ) {
|
||||
tests.add(new Object[]{Math.pow(10.0, -1.0 * pow), pow * 10});
|
||||
tests.add(new Object[]{Math.pow(10.0, -1.5 * pow), pow * 15});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testQualToErrorProbDouble() {
|
||||
for ( double qual = 3.0; qual < 255.0; qual += 0.1 ) {
|
||||
final double expected = Math.pow(10.0, qual / -10.0);
|
||||
Assert.assertEquals(QualityUtils.qualToErrorProb(qual), expected, TOLERANCE, "failed qual->error prob for double qual " + qual);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "PhredScaleDoubleOps")
|
||||
public void testPhredScaleDoubleOps(final double errorRate, final double expectedPhredScaled) {
|
||||
final double actualError = QualityUtils.phredScaleErrorRate(errorRate);
|
||||
Assert.assertEquals(actualError, expectedPhredScaled, TOLERANCE);
|
||||
final double trueRate = 1 - errorRate;
|
||||
final double actualTrue = QualityUtils.phredScaleCorrectRate(trueRate);
|
||||
if ( trueRate == 1.0 ) {
|
||||
Assert.assertEquals(actualTrue, QualityUtils.MIN_PHRED_SCALED_QUAL);
|
||||
} else {
|
||||
final double tol = errorRate < 1e-10 ? 10.0 : 1e-3;
|
||||
Assert.assertEquals(actualTrue, expectedPhredScaled, tol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
|
@ -63,7 +62,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
final byte[] quals = new byte[readLength];
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigar);
|
||||
reads.add(read);
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
final byte[] quals = new byte[readLength];
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigarString);
|
||||
return read;
|
||||
|
|
|
|||
|
|
@ -315,7 +315,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
read.setReadBases(("TT" + eventBases + "A").getBytes());
|
||||
final byte[] quals = new byte[readLength];
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigar);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue