BQSR bug triage #3

* fixed context covariate famous "off by one" error
   * reduced maximum quality score to Q50 (following Eric/Ryan's suggestion)
   * remove context downsampling in BQSR R script
This commit is contained in:
Mauricio Carneiro 2012-04-19 12:21:12 -04:00
parent df5dd841af
commit 0f8c77391d
8 changed files with 23 additions and 28 deletions

View File

@ -125,8 +125,8 @@ public class ContextCovariate implements StandardCovariate {
*/
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
BitSet result = null;
if (offset >= contextSize) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (offset - contextSize + 1 >= 0) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1));
if (!context.contains("N"))
result = BitSetUtils.bitSetFrom(context);
}

View File

@ -76,7 +76,7 @@ public class Datum {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
byte empiricalQualByte() {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
@ -152,7 +151,7 @@ public class RecalDataManager {
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
for (Covariate covariate : requiredCovariates) {
requiredCovariatesToAdd.add(covariate);
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
}

View File

@ -74,7 +74,7 @@ public class RecalDatum extends Datum {
}
public final void calcCombinedEmpiricalQuality() {
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public final void calcEstimatedReportedQuality() {
@ -102,7 +102,7 @@ public class RecalDatum extends Datum {
@Override
public String toString() {
return String.format("%d,%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()), (byte) Math.floor(getEstimatedQReported()));
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}

View File

@ -9,6 +9,7 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
public final static byte MAX_RECALIBRATED_Q_SCORE = 50;
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);

View File

@ -68,9 +68,9 @@ public class BaseRecalibration {
/**
* This constructor only exists for testing purposes.
*
* @param quantizationInfo
* @param keysAndTablesMap
* @param requestedCovariates
* @param quantizationInfo the quantization info object
* @param keysAndTablesMap the map of key managers and recalibration tables
* @param requestedCovariates the list of requested covariates
*/
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
this.quantizationInfo = quantizationInfo;
@ -179,9 +179,8 @@ public class BaseRecalibration {
}
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
}

View File

@ -39,8 +39,8 @@ public class ContextCovariateUnitTest {
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
for (int i = 0; i < values.length; i++) {
String expectedContext = null;
if (i >= contextSize) {
String context = bases.substring(i - contextSize, i);
if (i - contextSize + 1 >= 0) {
String context = bases.substring(i - contextSize + 1, i + 1);
if (!context.contains("N"))
expectedContext = context;
}

View File

@ -24,10 +24,6 @@ public class BaseRecalibrationUnitTest {
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap;
private BQSRKeyManager rgKeyManager;
private BQSRKeyManager qsKeyManager;
private BQSRKeyManager cvKeyManager;
private ReadGroupCovariate rgCovariate;
private QualityScoreCovariate qsCovariate;
private ContextCovariate cxCovariate;
@ -60,13 +56,13 @@ public class BaseRecalibrationUnitTest {
rgCovariate = new ReadGroupCovariate();
rgCovariate.initialize(RAC);
requiredCovariates.add(rgCovariate);
rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(rgKeyManager, new HashMap<BitSet, RecalDatum>());
qsCovariate = new QualityScoreCovariate();
qsCovariate.initialize(RAC);
requiredCovariates.add(qsCovariate);
qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(qsKeyManager, new HashMap<BitSet, RecalDatum>());
cxCovariate = new ContextCovariate();
@ -75,7 +71,7 @@ public class BaseRecalibrationUnitTest {
cyCovariate = new CycleCovariate();
cyCovariate.initialize(RAC);
optionalCovariates.add(cyCovariate);
cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
keysAndTablesMap.put(cvKeyManager, new HashMap<BitSet, RecalDatum>());
@ -108,7 +104,7 @@ public class BaseRecalibrationUnitTest {
updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum);
}
}
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_QUAL_SCORE);
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
List<Byte> quantizedQuals = new ArrayList<Byte>();
List<Long> qualCounts = new ArrayList<Long>();
@ -179,7 +175,7 @@ public class BaseRecalibrationUnitTest {
BitSet key = entry.getKey();
RecalDatum datum = entry.getValue();
List<Object> keySet = keyManager.keySetFrom(key);
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum));
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported());
}
System.out.println();
}
@ -187,9 +183,9 @@ public class BaseRecalibrationUnitTest {
}
private static void printNestedHashMap(Map<Object,Object> table, String output) {
private static void printNestedHashMap(Map table, String output) {
for (Object key : table.keySet()) {
String ret = "";
String ret;
if (output.isEmpty())
ret = "" + key;
else
@ -199,7 +195,7 @@ public class BaseRecalibrationUnitTest {
if (next instanceof org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum)
System.out.println(ret + " => " + next);
else
printNestedHashMap((Map<Object, Object>) next, "" + ret);
printNestedHashMap((Map) next, "" + ret);
}
}
@ -269,7 +265,7 @@ public class BaseRecalibrationUnitTest {
}
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE);
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
// Verbose printouts used to validate with old recalibrator
//if(key.contains(null)) {
@ -289,6 +285,6 @@ public class BaseRecalibrationUnitTest {
final double doubleMismatches = (double) (errors + smoothing);
final double doubleObservations = (double) ( observations + smoothing );
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(QualityUtils.MAX_QUAL_SCORE, empiricalQual);
return Math.min(QualityUtils.MAX_RECALIBRATED_Q_SCORE, empiricalQual);
}
}