Cleanup and unit tests for QualityUtils

-- Fixed a few conversion bugs with edge case quals (ones that were very high)
-- Fixed a critical bug in the conversion of quals that was causing near capped quals to fall below their actual value.  Will undoubtedly need to fix md5s
-- More precise prob -> qual calculations for very high confidence events in phredScaleCorrectRate, trueProbToQual, and errorProbToQual.  Very likely to improve accuracy of many calculations in the GATK
-- Added errorProbToQual and trueProbToQual calculations that accept an integer cap, and perform the (tricky) conversion from int to byte correctly.
-- Full docs and unit tests for phredScaleCorrectRate and phredScaleErrorRate.
-- Renamed probToQual to trueProbToQual
-- Added goodProbability and log10OneMinusX to MathUtils
-- Went through the GATK and cleaned up many uses of QualityUtils
-- Cleanup constants in QualityUtils
-- Added full docs for all of the constants
-- Rename MAX_QUAL_SCORE to MAX_SAM_QUAL_SCORE for clarity
-- Moved MAX_GATK_USABLE_Q_SCORE to RecalDatum, as it's s BQSR specific feature
-- Convert uses of QualityUtils.errorProbToQual(1-x) to QualityUtils.trueProbToQual(x)
-- Cleanup duplicate quality score routines in MathUtils.  Moved and renamed MathUtils.log10ProbabilityToPhredScale => QualityUtils.phredScaleLog10ErrorRate. Removed 3 routines from MathUtils, and remapped their usages into the better routines in QualityUtils
This commit is contained in:
Mark DePristo 2013-02-09 13:04:30 -05:00
parent 307f709cc7
commit 9e28d1e347
25 changed files with 470 additions and 146 deletions

View File

@ -179,6 +179,6 @@ public final class ReadRecalibrationInfo {
}
private boolean validQual(final byte result) {
return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE;
return result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE;
}
}

View File

@ -267,7 +267,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
//
// -------------------------------------------------------------------------------------
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][BaseUtils.BASES.length+1][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY];
static DiploidSNPGenotypeLikelihoods[][][][][] CACHE = new DiploidSNPGenotypeLikelihoods[BaseUtils.BASES.length][QualityUtils.MAX_SAM_QUAL_SCORE +1][BaseUtils.BASES.length+1][QualityUtils.MAX_SAM_QUAL_SCORE +1][MAX_PLOIDY];
protected boolean inCache(byte observedBase1, byte qualityScore1, byte observedBase2, byte qualityScore2, int ploidy) {
return getCache(CACHE, observedBase1, qualityScore1, observedBase2, qualityScore2, ploidy) != null;

View File

@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -123,7 +124,7 @@ public class ErrorModel {
}
}
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
double p = QualityUtils.qualToErrorProbLog10((byte)(maxQualityScore-minQualityScore));
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
// maximum uncertainty if there's no ref data at site
@ -270,7 +271,7 @@ public class ErrorModel {
})
private double log10PoissonProbabilitySiteGivenQual(byte q, int coverage, int mismatches) {
// same as log10ProbabilitySiteGivenQual but with Poisson approximation to avoid numerical underflows
double lambda = MathUtils.phredScaleToProbability(q) * (double )coverage;
double lambda = QualityUtils.qualToErrorProb(q) * (double )coverage;
// log10(e^-lambda*lambda^k/k!) = -lambda + k*log10(lambda) - log10factorial(k)
return Math.log10(lambda)*mismatches - lambda*log10MinusE- MathUtils.log10Factorial(mismatches);
}

View File

@ -613,7 +613,7 @@ public class UnifiedGenotyperEngine {
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
}
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
return new VariantCallContext(vc, QualityUtils.phredScaleCorrectRate(P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
}
protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {

View File

@ -57,6 +57,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
@ -395,7 +396,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
int phredScoreTransmission = -1;
if(transmissionProb != NO_TRANSMISSION_PROB){
double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb)));
double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
}
//Handle null, missing and unavailable genotypes

View File

@ -1017,7 +1017,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
// Determine the phase at this position:
this.maxEntry = hapTable.maxEntry();
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.probToQual(posteriorProb):
// convert posteriorProb to PHRED scale, but do NOT cap the quality as in QualityUtils.trueProbToQual(posteriorProb):
PreciseNonNegativeDouble sumErrorProbs = new PreciseNonNegativeDouble(ZERO);
for (PhasingTable.PhasingTableEntry pte : hapTable) {
if (pte != maxEntry)

View File

@ -52,7 +52,6 @@ import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -174,7 +173,7 @@ public class BaseRecalibration {
double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );
// recalibrated quality is bound between 1 and MAX_QUAL
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), RecalDatum.MAX_RECALIBRATED_Q_SCORE);
// return the quantized version of the recalibrated quality
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);

View File

@ -162,7 +162,7 @@ public class QualQuantizer {
"nObservations >= 0",
"nErrors >= 0",
"nErrors <= nObservations",
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE",
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE",
"mergeOrder >= 0"})
protected final class QualInterval implements Comparable<QualInterval> {
final int qStart, qEnd, fixedQual, level;
@ -224,10 +224,10 @@ public class QualQuantizer {
/**
* @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
*/
@Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
@Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE")
public byte getQual() {
if ( ! hasFixedQual() )
return QualityUtils.probToQual(1-getErrorRate(), 0);
return QualityUtils.errorProbToQual(getErrorRate());
else
return (byte)fixedQual;
}

View File

@ -76,7 +76,7 @@ public class QuantizationInfo {
}
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution
for (int i = 0; i < qualHistogram.length; i++)
qualHistogram[i] = 0L;
@ -100,7 +100,7 @@ public class QuantizationInfo {
}
public void noQuantization() {
this.quantizationLevels = QualityUtils.MAX_QUAL_SCORE;
this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE;
for (int i = 0; i < this.quantizationLevels; i++)
quantizedQuals.set(i, (byte) i);
}
@ -124,7 +124,7 @@ public class QuantizationInfo {
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));

View File

@ -74,10 +74,10 @@ package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMUtils;
import org.apache.commons.math.optimization.fitting.GaussianFunction;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
@ -100,6 +100,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
"numMismatches <= numObservations"
})
public class RecalDatum {
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
private static final double UNINITIALIZED = -1.0;
/**
@ -337,7 +338,7 @@ public class RecalDatum {
// This is the old and busted point estimate approach:
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE);
}
//static final boolean DEBUG = false;
@ -369,7 +370,12 @@ public class RecalDatum {
return Qemp;
}
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
/**
* Quals above this value should be capped down to this value (because they are too high)
* in the base quality score recalibrator
*/
public final static byte MAX_GATK_USABLE_Q_SCORE = 40;
static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1];
static {
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
@ -379,7 +385,7 @@ public class RecalDatum {
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) {
double log10Prior = Math.log10(gaussian.value((double) i));
if ( Double.isInfinite(log10Prior) )
log10Prior = -Double.MAX_VALUE;
@ -388,7 +394,7 @@ public class RecalDatum {
}
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE);
//if ( DEBUG )
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
return log10QempPriorCache[difference];

View File

@ -263,11 +263,11 @@ public class RecalibrationReport {
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
*
* @param table the GATKReportTable containing the quantization mappings
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
* @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE
*/
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
final Byte[] quals = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
for ( int i = 0; i < table.getNumRows(); i++ ) {
final byte originalQual = (byte)i;
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);

View File

@ -119,6 +119,6 @@ public class QualityScoreCovariate implements RequiredCovariate {
@Override
public int maximumKeyValue() {
return QualityUtils.MAX_QUAL_SCORE;
return QualityUtils.MAX_SAM_QUAL_SCORE;
}
}

View File

@ -90,7 +90,7 @@ public class QualQuantizerUnitTest extends BaseTest {
this.exError = exError;
this.exTotal = exTotal;
this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1));
this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0);
this.exQual = QualityUtils.errorProbToQual(this.exErrorRate);
}
}

View File

@ -50,7 +50,6 @@ package org.broadinstitute.sting.utils.recalibration;
// the imports for unit testing.
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -58,7 +57,6 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -207,8 +205,8 @@ public class RecalDatumUnitTest extends BaseTest {
@Test
public void testlog10QempPrior() {
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
Assert.assertTrue(log10prior < 0.0);
Assert.assertFalse(Double.isInfinite(log10prior));
@ -219,7 +217,7 @@ public class RecalDatumUnitTest extends BaseTest {
final int Qrep = 20;
int maxQemp = -1;
double maxQempValue = -Double.MAX_VALUE;
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
if ( log10prior > maxQempValue ) {
maxQemp = Qemp;

View File

@ -67,7 +67,7 @@ public class RecalibrationReportUnitTest {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
final int qual = random.nextInt(QualityUtils.MAX_SAM_QUAL_SCORE);
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
}
@ -75,10 +75,10 @@ public class RecalibrationReportUnitTest {
public void testOutput() {
final int length = 100;
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_QUAL_SCORE + 1);
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_QUAL_SCORE + 1);
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_SAM_QUAL_SCORE + 1);
for (int i = 0; i<= QualityUtils.MAX_QUAL_SCORE; i++) {
for (int i = 0; i<= QualityUtils.MAX_SAM_QUAL_SCORE; i++) {
quals.add((byte) i);
counts.add(1L);
}

View File

@ -200,7 +200,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
final int mismatches = (Integer)table.get(key, "mismatches");
final int count = (Integer)table.get(key, "counts");
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
final int qual = QualityUtils.errorProbToQual(errorRate);
table.set(key, "qual", qual);
table.set(key, "errorrate", errorRate);
}

View File

@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -408,7 +409,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
return genotype.getGQ() >= minGenotypeQuality;
} else if ( genotype.hasLikelihoods() ) {
double log10gq = GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector());
return MathUtils.log10ProbabilityToPhredScale(log10gq) >= minGenotypeQuality;
return QualityUtils.phredScaleLog10ErrorRate(log10gq) >= minGenotypeQuality;
}
return minGenotypeQuality <= 0;

View File

@ -1251,6 +1251,16 @@ public class MathUtils {
return result <= 0.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
}
/**
* Checks that the result is a well-formed probability
*
* @param result a supposedly well-formed probability value
* @return true if result is really well formed
*/
public static boolean goodProbability(final double result) {
return result >= 0.0 && result <= 1.0 && ! Double.isInfinite(result) && ! Double.isNaN(result);
}
/**
* A utility class that computes on the fly average and standard deviation for a stream of numbers.
* The number of observations does not have to be known in advance, and can be also very big (so that
@ -1343,28 +1353,6 @@ public class MathUtils {
return Math.max(a, x2);
}
public static double phredScaleToProbability(byte q) {
return Math.pow(10, (-q) / 10.0);
}
public static double phredScaleToLog10Probability(byte q) {
return ((-q) / 10.0);
}
/**
* Returns the phred scaled value of probability p
*
* @param p probability (between 0 and 1).
* @return phred scaled probability of p
*/
public static byte probabilityToPhredScale(double p) {
return (byte) ((-10) * Math.log10(p));
}
public static double log10ProbabilityToPhredScale(double log10p) {
return (-10) * log10p;
}
/**
* Converts LN to LOG10
*
@ -1774,4 +1762,24 @@ public class MathUtils {
return values;
}
/**
* Compute in a numerical correct way the quanity log10(1-x)
*
* Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow
* in 1-x when x is very small
*
* @param x a positive double value between 0.0 and 1.0
* @return an estimate of log10(1-x)
*/
@Requires("x >= 0.0 && x <= 1.0")
@Ensures("result <= 0.0")
public static double log10OneMinusX(final double x) {
if ( x == 1.0 )
return Double.NEGATIVE_INFINITY;
else if ( x == 0.0 )
return 0.0;
else
return Math.log10(1 / x - 1) + Math.log10(x);
}
}

View File

@ -25,38 +25,50 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMUtils;
/**
* QualityUtils is a static class (no instantiation allowed!) with some utility methods for manipulating
* quality scores.
*
* @author Kiran Garimella
* @author Kiran Garimella, Mark DePristo
* @since Way back
*/
public class QualityUtils {
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
/**
* Maximum quality score that can be encoded in a SAM/BAM file
*/
public final static byte MAX_SAM_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 60; // bams containing quals above this value are extremely suspicious and we should warn the user
public final static byte MAX_GATK_USABLE_Q_SCORE = 40; // quals above this value should be capped down to this value (because they are too high)
private final static double RAW_MIN_PHRED_SCALED_QUAL = Math.log10(Double.MIN_VALUE);
protected final static double MIN_PHRED_SCALED_QUAL = -10.0 * RAW_MIN_PHRED_SCALED_QUAL;
/**
* bams containing quals above this value are extremely suspicious and we should warn the user
*/
public final static byte MAX_REASONABLE_Q_SCORE = 60;
/**
* The lowest quality score for a base that is considered reasonable for statistical analysis. This is
* because Q 6 => you stand a 25% of being right, which means all bases are equally likely
*/
public final static byte MIN_USABLE_Q_SCORE = 6;
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
/**
* Cached values for qual as byte calculations so they are very fast
*/
private static double qualToErrorProbCache[] = new double[256];
static {
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
}
private static double qualToErrorProbLog10Cache[] = new double[256];
static {
for (int i = 0; i < 256; i++) qualToErrorProbLog10Cache[i] = qualToErrorProbLog10Raw(i);
}
private static double qualToProbLog10Cache[] = new double[256];
static {
for (int i = 0; i < 256; i++) qualToProbLog10Cache[i] = qualToProbLog10Raw(i);
for (int i = 0; i < 256; i++) {
qualToErrorProbCache[i] = qualToErrorProb((double) i);
qualToProbLog10Cache[i] = Math.log10(1.0 - qualToErrorProbCache[i]);
}
}
/**
@ -64,111 +76,301 @@ public class QualityUtils {
*/
private QualityUtils() {}
// ----------------------------------------------------------------------
//
// These are all functions to convert a phred-scaled quality score to a probability
//
// ----------------------------------------------------------------------
/**
* Convert a quality score to a probability. This is the Phred-style
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
* Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
*
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* Because the input is a discretized byte value, this function uses a cache so is very efficient
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param qual a quality score (0-255)
* @return a probability (0.0-1.0)
*/
static public double qualToProb(byte qual) {
@Ensures("result >= 0.0 && result <= 1.0")
public static double qualToProb(final byte qual) {
return 1.0 - qualToErrorProb(qual);
}
static public double qualToProb(double qual) {
return 1.0 - Math.pow(10.0, qual/(-10.0));
/**
* Convert a phred-scaled quality score to its probability of being true (Q30 => 0.999)
*
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* Because the input is a double value, this function must call Math.pow so can be quite expensive
*
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
* @return a probability (0.0-1.0)
*/
@Requires("qual >= 0.0")
@Ensures("result >= 0.0 && result <= 1.0")
public static double qualToProb(final double qual) {
return 1.0 - qualToErrorProb(qual);
}
static private double qualToProbLog10Raw(int qual) {
return Math.log10(1.0 - qualToErrorProbRaw(qual));
}
static public double qualToProbLog10(byte qual) {
/**
* Convert a phred-scaled quality score to its log10 probability of being true (Q30 => log10(0.999))
*
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* Because the input is a double value, this function must call Math.pow so can be quite expensive
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
* @return a probability (0.0-1.0)
*/
@Ensures("result <= 0.0")
public static double qualToProbLog10(final byte qual) {
return qualToProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}
/**
* Convert a quality score to a probability of error. This is the Phred-style
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
* Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
*
* @param qual a quality score (0 - 255)
* @return a probability (0.0 - 1.0)
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* Because the input is a double value, this function must call Math.pow so can be quite expensive
*
* @param qual a phred-scaled quality score encoded as a double. Can be non-integer values (30.5)
* @return a probability (0.0-1.0)
*/
static private double qualToErrorProbRaw(int qual) {
return qualToErrorProb((double) qual);
}
@Requires("qual >= 0.0")
@Ensures("result >= 0.0 && result <= 1.0")
public static double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual/-10.0);
return Math.pow(10.0, qual / -10.0);
}
static public double qualToErrorProb(byte qual) {
/**
* Convert a phred-scaled quality score to its probability of being wrong (Q30 => 0.001)
*
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* Because the input is a byte value, this function uses a cache so is very efficient
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param qual a phred-scaled quality score encoded as a byte
* @return a probability (0.0-1.0)
*/
@Ensures("result >= 0.0 && result <= 1.0")
public static double qualToErrorProb(final byte qual) {
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}
static private double qualToErrorProbLog10Raw(int qual) {
return ((double) qual)/-10.0;
}
static public double qualToErrorProbLog10(byte qual) {
return qualToErrorProbLog10Cache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}
static public double qualToErrorProbLog10(final double qual) {
return qual/-10.0;
/**
* Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
*
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* The calculation is extremely efficient
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param qual a phred-scaled quality score encoded as a byte
* @return a probability (0.0-1.0)
*/
@Ensures("result <= 0.0")
public static double qualToErrorProbLog10(final byte qual) {
return qualToErrorProbLog10((double)(qual & 0xFF));
}
/**
* Convert a probability to a quality score. Note, this is capped at Q40.
* Convert a phred-scaled quality score to its log10 probability of being wrong (Q30 => log10(0.001))
*
* @param prob a probability (0.0-1.0)
* @return a quality score (0-40)
* This is the Phred-style conversion, *not* the Illumina-style conversion.
*
* The calculation is extremely efficient
*
* @param qual a phred-scaled quality score encoded as a double
* @return a probability (0.0-1.0)
*/
static public byte probToQual(double prob) {
return probToQual(prob, MIN_REASONABLE_ERROR);
//return (byte) Math.round(-10.0*Math.log10(1.0 - prob + 0.0001));
@Ensures("result <= 0.0")
public static double qualToErrorProbLog10(final double qual) {
return qual / -10.0;
}
// ----------------------------------------------------------------------
//
// Functions to convert a probability to a phred-scaled quality score
//
// ----------------------------------------------------------------------
/**
* Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
*
* Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
* and by 1 at the low-end.
*
* @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
* @return a quality score (0-MAX_SAM_QUAL_SCORE)
*/
@Requires("errorRate >= 0.0 && errorRate <= 1.0")
public static byte errorProbToQual(final double errorRate) {
return errorProbToQual(errorRate, MAX_SAM_QUAL_SCORE);
}
/**
* Convert a probability to a quality score. Note, this is capped at a quality score which is determined by _eps_.
* Convert a probability of being wrong to a phred-scaled quality score (0.01 => 20).
*
* @param prob a probability (0.0-1.0)
* @param eps min probabilty allowed (0.0-1.0)
* @return a quality score (0-255)
* Note, this function caps the resulting quality score by the public static value MIN_REASONABLE_ERROR
* and by 1 at the low-end.
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param errorRate a probability (0.0-1.0) of being wrong (i.e., 0.01 is 1% change of being wrong)
* @return a quality score (0-maxQual)
*/
static public byte probToQual(double prob, double eps) {
double lp = Math.round(-10.0*Math.log10(1.0 - prob + eps));
//System.out.printf("LP is %f, byte is %d%n", lp, b);
return boundQual((int)lp);
}
static public double phredScaleCorrectRate(double trueRate) {
return phredScaleErrorRate(1-trueRate);
}
static public double phredScaleErrorRate(double errorRate) {
return Math.abs(-10.0*Math.log10(errorRate));
@Requires("errorRate >= 0.0 && errorRate <= 1.0")
public static byte errorProbToQual(final double errorRate, final byte maxQual) {
final double d = Math.round(-10.0*Math.log10(errorRate));
return boundQual((int)d, maxQual);
}
/**
* Return a quality score, capped at max qual.
*
* @param qual the uncapped quality score
* @return the capped quality score
* @see #errorProbToQual(double, byte) with proper conversion of maxQual integer to a byte
*/
static public byte boundQual(int qual) {
return boundQual(qual, MAX_QUAL_SCORE);
@Requires("maxQual >= 0 && maxQual < 255")
public static byte errorProbToQual(final double prob, final int maxQual) {
return errorProbToQual(prob, (byte)(maxQual & 0xFF));
}
/**
* Returns an integer quality score bounded by 1 - maxQual.
* Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
*
* @param qual the quality score
* @param maxQual the maximum quality
* @return the integer betwen 1 and maxqual.
* Note, this function caps the resulting quality score by the public static value MAX_SAM_QUAL_SCORE
* and by 1 at the low-end.
*
* @param prob a probability (0.0-1.0) of being right
* @return a quality score (0-MAX_SAM_QUAL_SCORE)
*/
static public byte boundQual(int qual, byte maxQual) {
return (byte) Math.max(Math.min(qual, maxQual), 1);
@Requires("prob >= 0.0 && prob <= 1.0")
public static byte trueProbToQual(final double prob) {
return trueProbToQual(prob, MAX_SAM_QUAL_SCORE);
}
/**
* Convert a probability of being right to a phred-scaled quality score (0.99 => 20).
*
* Note, this function caps the resulting quality score by the min probability allowed (EPS).
* So for example, if prob is 1e-6, which would imply a Q-score of 60, and EPS is 1e-4,
* the result of this function is actually Q40.
*
* Note that the resulting quality score, regardless of EPS, is capped by MAX_SAM_QUAL_SCORE and
* bounded on the low-side by 1.
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param prob a probability (0.0-1.0) of being right
* @param maxQual the maximum quality score we are allowed to emit here, regardless of the error rate
* @return a phred-scaled quality score (0-maxQualScore) as a byte
*/
@Requires({
"prob >= 0.0 && prob <= 1.0"
})
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
public static byte trueProbToQual(final double prob, final byte maxQual) {
final double lp = Math.round(-10.0*MathUtils.log10OneMinusX(prob));
return boundQual((int)lp, maxQual);
}
/**
* @see #trueProbToQual(double, byte) with proper conversion of maxQual to a byte
*/
@Requires("maxQual >= 0 && maxQual < 255")
public static byte trueProbToQual(final double prob, final int maxQual) {
return trueProbToQual(prob, (byte)(maxQual & 0xFF));
}
/**
* Convert a probability of being right to a phred-scaled quality score of being wrong as a double
*
* This is a very generic method, that simply computes a phred-scaled double quality
* score given an error rate. It has the same precision as a normal double operation
*
* @param trueRate the probability of being right (0.0-1.0)
* @return a phred-scaled version of the error rate implied by trueRate
*/
@Requires("MathUtils.goodProbability(trueRate)")
@Ensures("result >= 0.0")
public static double phredScaleCorrectRate(final double trueRate) {
return phredScaleLog10ErrorRate(MathUtils.log10OneMinusX(trueRate));
}
/**
* Convert a probability of being wrong to a phred-scaled quality score of being wrong as a double
*
* This is a very generic method, that simply computes a phred-scaled double quality
* score given an error rate. It has the same precision as a normal double operation
*
* @param errorRate the probability of being wrong (0.0-1.0)
* @return a phred-scaled version of the error rate
*/
@Requires("MathUtils.goodProbability(errorRate)")
@Ensures("result >= 0.0")
public static double phredScaleErrorRate(final double errorRate) {
return phredScaleLog10ErrorRate(Math.log10(errorRate));
}
/**
* Convert a log10 probability of being wrong to a phred-scaled quality score of being wrong as a double
*
* This is a very generic method, that simply computes a phred-scaled double quality
* score given an error rate. It has the same precision as a normal double operation
*
* @param errorRateLog10 the log10 probability of being wrong (0.0-1.0)
* @return a phred-scaled version of the error rate
*/
@Ensures("result >= 0.0")
public static double phredScaleLog10ErrorRate(final double errorRateLog10) {
return -10.0 * Math.max(errorRateLog10, RAW_MIN_PHRED_SCALED_QUAL);
}
// ----------------------------------------------------------------------
//
// Routines to bound a quality score to a reasonable range
//
// ----------------------------------------------------------------------
/**
* Return a quality score that bounds qual by MAX_SAM_QUAL_SCORE and 1
*
* @param qual the uncapped quality score as an integer
* @return the bounded quality score
*/
@Requires("qual >= 0")
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (MAX_SAM_QUAL_SCORE & 0xFF)")
public static byte boundQual(int qual) {
return boundQual(qual, MAX_SAM_QUAL_SCORE);
}
/**
* Return a quality score that bounds qual by maxQual and 1
*
* WARNING -- because this function takes a byte for maxQual, you must be careful in converting
* integers to byte. The appropriate way to do this is ((byte)(myInt & 0xFF))
*
* @param qual the uncapped quality score as an integer
* @param maxQual the maximum quality score, must be less < 255
* @return the bounded quality score
*/
@Requires({"qual >= 0"})
@Ensures("(result & 0xFF) >= 1 && (result & 0xFF) <= (maxQual & 0xFF)")
public static byte boundQual(final int qual, final byte maxQual) {
return (byte) (Math.max(Math.min(qual, maxQual & 0xFF), 1) & 0xFF);
}
}

View File

@ -94,8 +94,7 @@ public class DupUtils {
Arrays.sort(probs);
double normalizedP = Math.pow(10, bestProb) / sumProbs;
double eps = Math.pow(10, -maxQScore/10.0);
byte qual = QualityUtils.probToQual(normalizedP, eps);
byte qual = QualityUtils.trueProbToQual(normalizedP, maxQScore);
// if ( false ) {
// System.out.printf("Best base is %s %.8f%n", bestBase, bestProb);
// System.out.printf("2nd base is %.8f%n", probs[1]);

View File

@ -129,7 +129,7 @@ public class LIBSPerformance extends CommandLineProgram {
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
// final byte[] quals = new byte[readLength];
// for ( int i = 0; i < readLength; i++ )
// quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
// quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
// read.setBaseQualities(quals);
// read.setCigarString(cigar);
//

View File

@ -34,6 +34,7 @@ package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
@ -42,10 +43,64 @@ import java.util.*;
* Basic unit test for QualityUtils class
*/
public class QualityUtilsUnitTest extends BaseTest {
final private static double TOLERANCE = 1e-9;
@BeforeClass
public void init() {
}
@DataProvider(name = "QualTest")
public Object[][] makeMyDataProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( int qual = 0; qual < 255; qual++ ) {
tests.add(new Object[]{(byte)(qual & 0xFF), Math.pow(10.0, ((double)qual)/-10.0)});
}
return tests.toArray(new Object[][]{});
}
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "QualTest")
public void testMyData(final byte qual, final double errorRate) {
final double trueRate = 1 - errorRate;
final double actualErrorRate = QualityUtils.qualToErrorProb(qual);
Assert.assertEquals(actualErrorRate, errorRate, TOLERANCE);
final double actualTrueRate = QualityUtils.qualToProb(qual);
Assert.assertEquals(actualTrueRate, trueRate, TOLERANCE);
// log10 tests
final double actualLog10ErrorRate = QualityUtils.qualToErrorProbLog10(qual);
Assert.assertEquals(actualLog10ErrorRate, Math.log10(errorRate), TOLERANCE);
final double actualLog10TrueRate = QualityUtils.qualToProbLog10(qual);
Assert.assertEquals(actualLog10TrueRate, Math.log10(trueRate), TOLERANCE);
// test that we can convert our error rates to quals, accounting for boundaries
final int expectedQual = Math.max(Math.min(qual & 0xFF, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
final byte actualQual = QualityUtils.trueProbToQual(trueRate);
Assert.assertEquals(actualQual, expectedQual & 0xFF);
final byte actualQualFromErrorRate = QualityUtils.errorProbToQual(errorRate);
Assert.assertEquals(actualQualFromErrorRate, expectedQual & 0xFF);
for ( int maxQual = 10; maxQual < QualityUtils.MAX_SAM_QUAL_SCORE; maxQual++ ) {
final byte maxAsByte = (byte)(maxQual & 0xFF);
final byte expectedQual2 = (byte)(Math.max(Math.min(qual & 0xFF, maxQual), 1) & 0xFF);
final byte actualQual2 = QualityUtils.trueProbToQual(trueRate, maxAsByte);
Assert.assertEquals(actualQual2, expectedQual2, "Failed with max " + maxQual);
final byte actualQualFromErrorRate2 = QualityUtils.errorProbToQual(errorRate, maxAsByte);
Assert.assertEquals(actualQualFromErrorRate2, expectedQual2, "Failed with max " + maxQual);
// test the integer routines
final byte actualQualInt2 = QualityUtils.trueProbToQual(trueRate, maxQual);
Assert.assertEquals(actualQualInt2, expectedQual2, "Failed with max " + maxQual);
final byte actualQualFromErrorRateInt2 = QualityUtils.errorProbToQual(errorRate, maxQual);
Assert.assertEquals(actualQualFromErrorRateInt2, expectedQual2, "Failed with max " + maxQual);
}
}
@Test
public void testQualCaches() {
Assert.assertEquals(QualityUtils.qualToErrorProb((byte) 20), 0.01, 1e-6);
@ -63,4 +118,59 @@ public class QualityUtilsUnitTest extends BaseTest {
Assert.assertEquals(QualityUtils.qualToProb((byte) 40), 0.9999, 1e-6);
Assert.assertEquals(QualityUtils.qualToProbLog10((byte) 40), -4.34316198e-5, 1e-6);
}
@Test()
public void testBoundingDefault() {
for ( int qual = 0; qual < 1000; qual++ ) {
final byte expected = (byte)Math.max(Math.min(qual, QualityUtils.MAX_SAM_QUAL_SCORE), 1);
Assert.assertEquals(QualityUtils.boundQual(qual), expected);
}
}
@Test()
public void testBoundingWithMax() {
for ( int max = 10; max < 255; max += 50 ) {
for ( int qual = 0; qual < 1000; qual++ ) {
final int expected = Math.max(Math.min(qual, max), 1);
Assert.assertEquals(QualityUtils.boundQual(qual, (byte)(max & 0xFF)) & 0xFF, expected & 0xFF, "qual " + qual + " max " + max);
}
}
}
@DataProvider(name = "PhredScaleDoubleOps")
public Object[][] makePhredDoubleTest() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{0.0, -10 * Math.log10(Double.MIN_VALUE)});
tests.add(new Object[]{1.0, 0.0});
for ( int pow = 1; pow < 20; pow++ ) {
tests.add(new Object[]{Math.pow(10.0, -1.0 * pow), pow * 10});
tests.add(new Object[]{Math.pow(10.0, -1.5 * pow), pow * 15});
}
return tests.toArray(new Object[][]{});
}
@Test()
public void testQualToErrorProbDouble() {
for ( double qual = 3.0; qual < 255.0; qual += 0.1 ) {
final double expected = Math.pow(10.0, qual / -10.0);
Assert.assertEquals(QualityUtils.qualToErrorProb(qual), expected, TOLERANCE, "failed qual->error prob for double qual " + qual);
}
}
@Test(dataProvider = "PhredScaleDoubleOps")
public void testPhredScaleDoubleOps(final double errorRate, final double expectedPhredScaled) {
final double actualError = QualityUtils.phredScaleErrorRate(errorRate);
Assert.assertEquals(actualError, expectedPhredScaled, TOLERANCE);
final double trueRate = 1 - errorRate;
final double actualTrue = QualityUtils.phredScaleCorrectRate(trueRate);
if ( trueRate == 1.0 ) {
Assert.assertEquals(actualTrue, QualityUtils.MIN_PHRED_SCALED_QUAL);
} else {
final double tol = errorRate < 1e-10 ? 10.0 : 1e-3;
Assert.assertEquals(actualTrue, expectedPhredScaled, tol);
}
}
}

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;
@ -63,7 +62,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
final byte[] quals = new byte[readLength];
for ( int i = 0; i < readLength; i++ )
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
read.setBaseQualities(quals);
read.setCigarString(cigar);
reads.add(read);

View File

@ -141,7 +141,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
final byte[] quals = new byte[readLength];
for ( int i = 0; i < readLength; i++ )
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
read.setBaseQualities(quals);
read.setCigarString(cigarString);
return read;

View File

@ -315,7 +315,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
read.setReadBases(("TT" + eventBases + "A").getBytes());
final byte[] quals = new byte[readLength];
for ( int i = 0; i < readLength; i++ )
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
read.setBaseQualities(quals);
read.setCigarString(cigar);