diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AccuracyDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AccuracyDatum.java
new file mode 100644
index 000000000..b66a81f34
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AccuracyDatum.java
@@ -0,0 +1,52 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.MathUtils;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Short one line description of the walker.
+ *
+ *
[Long description of the walker]
+ *
+ *
+ * Input [Description of the Input]
+ *
+ * Output [Description of the Output]
+ *
+ * Examples
+ *
+ * java
+ * -jar GenomeAnalysisTK.jar
+ * -T [walker name]
+ *
+ *
+ * @author Mauricio Carneiro
+ * @since 4/17/12
+ */
+public class AccuracyDatum extends RecalDatum {
+ private final List accuracy = new LinkedList();
+ private final List reportedQualities = new LinkedList();
+
+ public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
+ super(recalDatum);
+ accuracy.add(calculateAccuracy(recalDatum, originalQuality));
+ reportedQualities.add(originalQuality);
+ }
+
+ public void combine(final RecalDatum recalDatum, final byte originalQuality) {
+ this.combine(recalDatum);
+ accuracy.add(calculateAccuracy(recalDatum, originalQuality));
+ reportedQualities.add(originalQuality);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
+ }
+
+ private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
+ return recalDatum.getEmpiricalQuality() - originalQuality;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
index bcbda4b20..2b48e5871 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
@@ -52,7 +52,7 @@ public class BQSRKeyManager {
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
int nBits = required.numberOfBits(); // number of bits used by this covariate
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
- this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
+ this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
nRequiredBits += nBits;
}
@@ -184,7 +184,7 @@ public class BQSRKeyManager {
* @return an object array with the values for each key
*/
public List keySetFrom(BitSet key) {
- List objectKeys = new ArrayList();
+ List objectKeys = new LinkedList();
for (RequiredCovariateInfo info : requiredCovariates) {
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
@@ -286,7 +286,7 @@ public class BQSRKeyManager {
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
- RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
+ RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
this.bitsBefore = bitsBefore;
this.mask = mask;
this.covariate = covariate;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
index 69461ed0e..c7c281943 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@@ -47,9 +47,6 @@ public class ContextCovariate implements StandardCovariate {
private int insertionsContextSize;
private int deletionsContextSize;
- private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
-// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
-
private byte LOW_QUAL_TAIL;
// Initialize any member variables using the command-line arguments passed to the walkers
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
index 00d3b650c..ebf90ebfd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
@@ -14,9 +14,9 @@ import java.util.BitSet;
* @since 2/8/12
*/
public class CovariateValues {
- private BitSet[] mismatches;
- private BitSet[] insertions;
- private BitSet[] deletions;
+ private final BitSet[] mismatches;
+ private final BitSet[] insertions;
+ private final BitSet[] deletions;
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
this.mismatches = mismatch;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
similarity index 65%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
rename to public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
index 39807283a..b3ea88d58 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
@@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
-import java.util.List;
-
/*
* Copyright (c) 2010 The Broad Institute
*
@@ -38,10 +36,13 @@ import java.util.List;
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
-public class RecalDatumOptimized {
+public class Datum {
+
+ long numObservations; // number of bases seen in total
+ long numMismatches; // number of bases seen that didn't match the reference
+
+ private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
- protected long numObservations; // number of bases seen in total
- protected long numMismatches; // number of bases seen that didn't match the reference
//---------------------------------------------------------------------------------------------------------------
//
@@ -49,67 +50,43 @@ public class RecalDatumOptimized {
//
//---------------------------------------------------------------------------------------------------------------
- public RecalDatumOptimized() {
+ public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
- public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
- numObservations = _numObservations;
- numMismatches = _numMismatches;
- }
-
- public RecalDatumOptimized(final RecalDatumOptimized copy) {
- this.numObservations = copy.numObservations;
- this.numMismatches = copy.numMismatches;
- }
-
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
- public synchronized final void increment(final long incObservations, final long incMismatches) {
+ synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
- public synchronized final void increment(final RecalDatumOptimized other) {
- increment(other.numObservations, other.numMismatches);
- }
-
- public synchronized final void increment(final List data) {
- for (RecalDatumOptimized other : data) {
- this.increment(other);
- }
- }
-
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
- public final double empiricalQualDouble(final int smoothing, final double maxQual) {
- final double doubleMismatches = (double) (numMismatches + smoothing);
- final double doubleObservations = (double) (numObservations + smoothing);
+ double empiricalQualDouble() {
+ final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
+ final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
- return Math.min(empiricalQual, maxQual);
+ return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
}
- public final byte empiricalQualByte(final int smoothing) {
- final double doubleMismatches = (double) (numMismatches + smoothing);
- final double doubleObservations = (double) (numObservations + smoothing);
- return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
- }
-
- public final byte empiricalQualByte() {
- return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
+ byte empiricalQualByte() {
+ final double doubleMismatches = (double) (numMismatches);
+ final double doubleObservations = (double) (numObservations);
+ return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
- public final String toString() {
+ public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java
index 4c53dcca5..6d004edb1 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java
@@ -7,8 +7,8 @@ public enum EventType {
BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D");
- public int index;
- public String representation;
+ public final int index;
+ private final String representation;
private EventType(int index, String representation) {
this.index = index;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java
index afe847583..9c91a1874 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java
@@ -19,9 +19,9 @@ import java.util.Map;
public class QuantizationInfo {
private List quantizedQuals;
private List empiricalQualCounts;
- int quantizationLevels;
+ private int quantizationLevels;
- public QuantizationInfo(List quantizedQuals, List empiricalQualCounts, int quantizationLevels) {
+ private QuantizationInfo(List quantizedQuals, List empiricalQualCounts, int quantizationLevels) {
this.quantizedQuals = quantizedQuals;
this.empiricalQualCounts = empiricalQualCounts;
this.quantizationLevels = quantizationLevels;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java
index f87986b47..fc4445b22 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java
@@ -13,9 +13,9 @@ import java.util.BitSet;
* @since 2/8/12
*/
public class ReadCovariates {
- private BitSet[][] mismatchesKeySet;
- private BitSet[][] insertionsKeySet;
- private BitSet[][] deletionsKeySet;
+ private final BitSet[][] mismatchesKeySet;
+ private final BitSet[][] insertionsKeySet;
+ private final BitSet[][] deletionsKeySet;
private int nextCovariateIndex;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
index ac80e2017..cedff0a80 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -221,7 +220,7 @@ public class RecalDataManager {
logger.info("");
}
- public static List generateReportTables(Map> keysAndTablesMap) {
+ private static List generateReportTables(Map> keysAndTablesMap) {
List result = new LinkedList();
int tableIndex = 0;
@@ -349,7 +348,7 @@ public class RecalDataManager {
* @param read The SAMRecord to parse
* @return whether or not this read should be skipped
*/
- public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
+ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
@@ -378,99 +377,10 @@ public class RecalDataManager {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
else
- return false; // otherwise, just skip the read
+ return true; // otherwise, just skip the read
}
}
- return true;
- }
-
- /**
- * Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
- * This method doesn't add the inconsistent tag to the read like parseColorSpace does
- *
- * @param read The SAMRecord to parse
- * @param originalQualScores The array of original quality scores to modify during the correction
- * @param solidRecalMode Which mode of solid recalibration to apply
- * @param refBases The reference for this read
- * @return A new array of quality scores that have been ref bias corrected
- */
- public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
-
- final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
- if (attr != null) {
- byte[] colorSpace;
- if (attr instanceof String) {
- colorSpace = ((String) attr).getBytes();
- }
- else {
- throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
- }
-
- // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
- byte[] readBases = read.getReadBases();
- final byte[] colorImpliedBases = readBases.clone();
- byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
- if (read.getReadNegativeStrandFlag()) {
- readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
- refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
- }
- final int[] inconsistency = new int[readBases.length];
- byte prevBase = colorSpace[0]; // The sentinel
- for (int iii = 0; iii < readBases.length; iii++) {
- final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
- colorImpliedBases[iii] = thisBase;
- inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
- prevBase = readBases[iii];
- }
-
- // Now that we have the inconsistency array apply the desired correction to the inconsistent bases
- if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
- final boolean setBaseN = false;
- originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
- }
- else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
- final boolean setBaseN = true;
- originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
- }
- else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
- solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
- }
-
- }
- else {
- throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
- " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
- }
-
- return originalQualScores;
- }
-
- public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
- if (ReadUtils.isSOLiDRead(read)) {
- final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
- if (attr != null) {
- byte[] colorSpace;
- if (attr instanceof String) {
- colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
- }
- else {
- throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
- }
-
- for (byte color : colorSpace) {
- if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
- return true; // There is a bad color in this SOLiD read and the user wants to skip over it
- }
- }
-
- }
- else {
- throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
- " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
- }
- }
-
- return false; // There aren't any color no calls in this SOLiD read
+ return false;
}
/**
@@ -625,16 +535,16 @@ public class RecalDataManager {
* @param offset The offset in the read at which to check
* @return Returns true if the base was inconsistent with the color space
*/
- public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
+ public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
if (attr != null) {
final byte[] inconsistency = (byte[]) attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
if (read.getReadNegativeStrandFlag()) { // Negative direction
- return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
+ return inconsistency[inconsistency.length - offset - 1] == (byte) 0;
}
else { // Forward direction
- return inconsistency[offset] != (byte) 0;
+ return inconsistency[offset] == (byte) 0;
}
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
@@ -654,7 +564,7 @@ public class RecalDataManager {
}
else { // No inconsistency array, so nothing is inconsistent
- return false;
+ return true;
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
index 0b66bb182..d232fde81 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
@@ -33,12 +33,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
-public class RecalDatum extends RecalDatumOptimized {
+public class RecalDatum extends Datum {
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
- private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
@@ -50,7 +49,7 @@ public class RecalDatum extends RecalDatumOptimized {
numObservations = 0L;
numMismatches = 0L;
estimatedQReported = 0.0;
- empiricalQuality = 0.0;
+ empiricalQuality = -1.0;
}
public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
@@ -67,60 +66,52 @@ public class RecalDatum extends RecalDatumOptimized {
this.empiricalQuality = copy.empiricalQuality;
}
- //---------------------------------------------------------------------------------------------------------------
- //
- // increment methods
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public final void combine(final RecalDatum other) {
+ public void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
this.increment(other.numObservations, other.numMismatches);
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
+ this.empiricalQuality = -1.0; // reset the empirical quality calculation so we never have a wrongly calculated empirical quality stored
}
- //---------------------------------------------------------------------------------------------------------------
- //
- // methods to derive empirical quality score
- //
- //---------------------------------------------------------------------------------------------------------------
-
- public final void calcCombinedEmpiricalQuality(final int maxQual) {
- this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
+ public final void calcCombinedEmpiricalQuality() {
+ this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public final void calcEstimatedReportedQuality() {
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
}
- //---------------------------------------------------------------------------------------------------------------
- //
- // misc. methods
- //
- //---------------------------------------------------------------------------------------------------------------
-
public final double getEstimatedQReported() {
return estimatedQReported;
}
public final double getEmpiricalQuality() {
+ if (empiricalQuality < 0)
+ calcCombinedEmpiricalQuality();
return empiricalQuality;
}
- private double calcExpectedErrors() {
- return (double) this.numObservations * qualToErrorProb(estimatedQReported);
- }
-
- private double qualToErrorProb(final double qual) {
- return Math.pow(10.0, qual / -10.0);
- }
-
/**
* Makes a hard copy of the recal datum element
*
* @return a new recal datum object with the same contents of this datum.
*/
- protected RecalDatum copy() {
+ public RecalDatum copy() {
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
}
+
+ @Override
+ public String toString() {
+ return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
+ }
+
+
+ private double calcExpectedErrors() {
+ return (double) this.numObservations * qualToErrorProb(estimatedQReported);
+ }
+
+ private double qualToErrorProb(final double qual) {
+ return Math.pow(10.0, qual / -10.0);
+ }
+
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index 07cb8d7a8..4a695ecb6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
-import java.io.PrintStream;
+import java.io.File;
import java.util.Collections;
import java.util.List;
@@ -62,7 +62,7 @@ public class RecalibrationArgumentCollection {
*/
@Gather(BQSRGatherer.class)
@Output
- public PrintStream RECAL_FILE;
+ public File RECAL_FILE;
/**
* List all implemented covariates.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java
index c434cc96b..19c04361b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java
@@ -18,13 +18,11 @@ import java.util.*;
*/
public class RecalibrationReport {
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
- private LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager
- private ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation
+ private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager
+ private final ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation
- GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
- RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
-
- private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
+ private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
+ private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
public RecalibrationReport(final File RECAL_FILE) {
GATKReport report = new GATKReport(RECAL_FILE);
@@ -53,6 +51,7 @@ public class RecalibrationReport {
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
+ final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
table = parseReadGroupTable(keyManager, reportTable);
@@ -292,7 +291,7 @@ public class RecalibrationReport {
public void calculateEmpiricalAndQuantizedQualities() {
for (Map table : keysAndTablesMap.values())
for (RecalDatum datum : table.values())
- datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
+ datum.calcCombinedEmpiricalQuality();
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index 3a5b07e58..2badca44c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -51,6 +51,7 @@ public class BaseRecalibration {
* Constructor using a GATK Report file
*
* @param RECAL_FILE a GATK Report file containing the recalibration information
+ * @param quantizationLevels number of bins to quantize the quality scores
*/
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
@@ -80,7 +81,7 @@ public class BaseRecalibration {
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
byte qualityScore = originalQuals[offset];
- if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
+ if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java
index 636d4ffb8..286b08a2c 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java
@@ -53,6 +53,17 @@ public class BQSRKeyManagerUnitTest {
createReadAndTest(covariates, nRequired);
}
+ @Test(enabled = true)
+ public void testOneCovariateWithOptionalCovariates() {
+ final int nRequired = 1;
+ final ArrayList covariates = new ArrayList(4);
+ covariates.add(new ReadGroupCovariate());
+ covariates.add(new QualityScoreCovariate());
+ covariates.add(new CycleCovariate());
+ covariates.add(new ContextCovariate());
+ createReadAndTest(covariates, nRequired);
+ }
+
private void createReadAndTest(List covariates, int nRequired) {
int readLength = 1000;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");