BitSets keys to lower BQSR's memory footprint

Infrastructure: * Generic BitSet implementation with any precision (up to long) * Two's complement implementation of the bit set handles negative numbers (cycle covariate) * Memoized implementation of the BitSet utils for better performance. * All exponents are now calculated with bit shifts, fixing numerical precision issues with the double Math.pow. * Replace log/sqrt with bitwise logic to get rid of numerical issues BQSR: * All covariates output BitSets and have the functionality to decode them back into Object values. * Covariates are responsible for determining the size of the key they will use (number of bits). * Generalized KeyManager implementation combines any arbitrary number of covariates into one bitset key with event type * No more NestedHashMaps. Single key system now fits in one hash to reduce hash table objects overhead Tests: * Unit tests added to every method of BitSetUtils * Unit tests added to the generalized key system infrastructure of BQSRv2 (KeyManager) * Unit tests added to the cycle and context covariates (will add unit tests to all covariates)
2012-03-05 17:37:34 -05:00 · 2012-03-05 17:37:34 -05:00 · ca11ab39e7
parent e86ce8f3d6
commit ca11ab39e7
19 changed files with 868 additions and 457 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@ -26,7 +26,7 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -45,6 +45,9 @@ public class ContextCovariate implements StandardCovariate {
    private int insertionsContextSize;
    private int deletionsContextSize;
    private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
    protected final String NO_CONTEXT_VALUE = "N";                          // protected so we can UNIT TEST it
    // Initialize any member variables using the command-line arguments passed to the walkers
    @Override
    public void initialize(final RecalibrationArgumentCollection RAC) {
@ -89,23 +92,34 @@ public class ContextCovariate implements StandardCovariate {
        return str;
    }
    @Override
    public String keyFromBitSet(BitSet key) {
        if (key.equals(NO_CONTEXT_BITSET))
            return NO_CONTEXT_VALUE;
        return BitSetUtils.dnaFrom(key);
    }
    @Override
    public int numberOfBits() {
        return Long.bitCount(-1L);
    }
    /**
-     * calculates the context of a base independent of the covariate mode
+     * calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
     *
     * @param bases       the bases in the read to build the context from
     * @param offset      the position in the read to calculate the context for
     * @param contextSize context size to use building the context
-     * @return
+     * @return the bitSet representing the Context
     */
    private BitSet contextWith(byte[] bases, int offset, int contextSize) {
-        if (offset < contextSize)
+        BitSet result = NO_CONTEXT_BITSET;
-            return null;
+        if (offset >= contextSize) {
            String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
-        if (context.contains("N"))
+            if (!context.contains("N"))
-            return null;
+                result = BitSetUtils.bitSetFrom(context);
-        
+        }
-        return MathUtils.bitSetFrom(context);
+        return result;
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.BitSet;
 /*
 * Copyright (c) 2009 The Broad Institute
 *
@ -53,7 +55,29 @@ public interface Covariate {
     */
    public CovariateValues getValues(GATKSAMRecord read);
-    public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    /**
     * Used to get the covariate's value from input csv file during on-the-fly recalibration
     *
     * @param str the key in string type (read from the csv)
     * @return the key in it's correct type.
     */
    public Object getValue(String str);
    /**
     * Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
     *
     * @param key the bitset representation of the key
     * @return a string representation of the key
     */
    public String keyFromBitSet(BitSet key);
    /**
     * Each covariate should determine how many bits are necessary to encode it's data
     *
     * @return The number of bits used to represent the values of this covariate.
     */
    public int numberOfBits();
 }
 interface RequiredCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.BitSet;
 import java.util.HashMap;
 /**
 * The object temporarily held by a read that describes all of it's covariates.
 *
@ -11,25 +14,40 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 * @since 2/8/12
 */
 public class CovariateKeySet {
-    private Object[][] mismatchesKeySet;
+    private BitSet[][] mismatchesKeySet;
-    private Object[][] insertionsKeySet;
+    private BitSet[][] insertionsKeySet;
-    private Object[][]  deletionsKeySet;
+    private BitSet[][] deletionsKeySet;
    private int nextCovariateIndex;
-    private static String mismatchesCovariateName = "M";
+    //    private static String mismatchesCovariateName = "M";
-    private static String insertionsCovariateName = "I";
+    //    private static String insertionsCovariateName = "I";
-    private static String  deletionsCovariateName = "D";
+    //    private static String deletionsCovariateName  = "D";
    //
    //    private static BitSet mismatchesCovariateBitSet = BitSetUtils.bitSetFrom(0);
    //    private static BitSet insertionsCovariateBitSet = BitSetUtils.bitSetFrom(1);
    //    private static BitSet deletionsCovariateBitSet = BitSetUtils.bitSetFrom(2);
    private static HashMap<String, RecalDataManager.BaseRecalibrationType> nameToType = new HashMap<String, RecalDataManager.BaseRecalibrationType>();
    private static HashMap<BitSet, String> bitSetToName = new HashMap<BitSet, String>();
    public CovariateKeySet(int readLength, int numberOfCovariates) {
-        numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
+        //        numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
-        this.mismatchesKeySet = new Object[readLength][numberOfCovariates]; 
+        this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
-        this.insertionsKeySet = new Object[readLength][numberOfCovariates];
+        this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
-        this.deletionsKeySet  = new Object[readLength][numberOfCovariates];
+        this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
-        initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
+        //        initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateBitSet);
-        initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
+        //        initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateBitSet);
-        initializeCovariateKeySet(this.deletionsKeySet,  deletionsCovariateName);
+        //        initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateBitSet);
        this.nextCovariateIndex = 0;
        //        nameToType.put(mismatchesCovariateName, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION);
        //        nameToType.put(insertionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
        //        nameToType.put(deletionsCovariateName,  RecalDataManager.BaseRecalibrationType.BASE_DELETION);
        //
        //        bitSetToName.put(BitSetUtils.bitSetFrom(0), mismatchesCovariateName);
        //        bitSetToName.put(BitSetUtils.bitSetFrom(1), insertionsCovariateName);
        //        bitSetToName.put(BitSetUtils.bitSetFrom(2), deletionsCovariateName);
    }
    public void addCovariate(CovariateValues covariate) {
@ -39,17 +57,19 @@ public class CovariateKeySet {
        nextCovariateIndex++;
    }
-    public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
+    public static RecalDataManager.BaseRecalibrationType errorModelFrom(final String modelString) {
-        if (modelString.equals(mismatchesCovariateName))
+        if (!nameToType.containsKey(modelString))
            return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
        else if (modelString.equals(insertionsCovariateName))
            return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
        else if (modelString.equals(deletionsCovariateName))
            return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
            throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
        return nameToType.get(modelString);
    }
-    public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
+    public static String eventNameFrom(final BitSet bitSet) {
        if (!bitSetToName.containsKey(bitSet))
            throw new ReviewedStingException("Unrecognized Event Type BitSet: " + bitSet);
        return bitSetToName.get(bitSet);
    }
    public BitSet[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
        switch (errorModel) {
            case BASE_SUBSTITUTION:
                return getMismatchesKeySet(readPosition);
@ -62,24 +82,24 @@ public class CovariateKeySet {
        }
    }
-    public Object[] getMismatchesKeySet(int readPosition) {
+    public BitSet[] getMismatchesKeySet(int readPosition) {
        return mismatchesKeySet[readPosition];
    }
-    public Object[] getInsertionsKeySet(int readPosition) {
+    public BitSet[] getInsertionsKeySet(int readPosition) {
        return insertionsKeySet[readPosition];
    }
-    public Object[] getDeletionsKeySet(int readPosition) {
+    public BitSet[] getDeletionsKeySet(int readPosition) {
        return deletionsKeySet[readPosition];
    }
-    private void transposeCovariateValues (Object [][] keySet, Object [] covariateValues) {
+    private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
        for (int i = 0; i < covariateValues.length; i++)
            keySet[i][nextCovariateIndex] = covariateValues[i];
    }
-    private void initializeCovariateKeySet (Object[][] keySet, String covariateName) {
+    private void initializeCovariateKeySet(BitSet[][] keySet, BitSet covariateName) {
        int readLength = keySet.length;
        int lastCovariateIndex = keySet[0].length - 1;
        for (int i = 0; i < readLength; i++)
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
@ -1,5 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import java.util.BitSet;
 /**
 * An object to hold the different covariate values for all bases in the read.
 *
@ -12,25 +14,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * @since 2/8/12
 */
 public class CovariateValues {
-    private Object[] mismatches;
+    private BitSet[] mismatches;
-    private Object[] insertions;
+    private BitSet[] insertions;
-    private Object[] deletions;
+    private BitSet[] deletions;
-    public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
+    public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
        this.mismatches = mismatch;
        this.insertions = insertion;
        this.deletions = deletion;
    }
-    public Object[] getMismatches() {
+    public BitSet[] getMismatches() {
        return mismatches;
    }
-    public Object[] getInsertions() {
+    public BitSet[] getInsertions() {
        return insertions;
    }
-    public Object[] getDeletions() {
+    public BitSet[] getDeletions() {
        return deletions;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
@ -1,10 +1,12 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.BitSet;
 import java.util.EnumSet;
 /*
@ -59,13 +61,13 @@ public class CycleCovariate implements StandardCovariate {
    // Used to pick out the covariate's value from attributes of the read
    @Override
    public CovariateValues getValues(final GATKSAMRecord read) {
-        Integer [] cycles = new Integer[read.getReadLength()];
+        BitSet[] cycles = new BitSet[read.getReadLength()];
        final NGSPlatform ngsPlatform = read.getNGSPlatform();
        // Discrete cycle platforms
        if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
-            final int init;
+            final short init;
-            final int increment;
+            final short increment;
            if (!read.getReadNegativeStrandFlag()) {
                // Differentiate between first and second of pair.
                // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
@ -88,19 +90,19 @@ public class CycleCovariate implements StandardCovariate {
            else {
                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
                    //second of pair, negative strand
-                    init = -read.getReadLength();
+                    init = (short) -read.getReadLength();
                    increment = 1;
                }
                else {
                    //first of pair, negative strand
-                    init = read.getReadLength();
+                    init = (short) read.getReadLength();
                    increment = -1;
                }
            }
-            int cycle = init;
+            short cycle = init;
            for (int i = 0; i < read.getReadLength(); i++) {
-                cycles[i] = cycle;
+                cycles[i] = BitSetUtils.bitSetFrom(cycle);
                cycle += increment;
            }
        }
@ -119,7 +121,7 @@ public class CycleCovariate implements StandardCovariate {
            //   the current sequential model would consider the effects independently instead of jointly.
            final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
-            int cycle = multiplyByNegative1 ? -1 : 1;
+            short cycle = multiplyByNegative1 ? (short) -1 : 1;
            // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
            // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -127,19 +129,19 @@ public class CycleCovariate implements StandardCovariate {
                int iii = 0;
                while (iii < readLength) {
                    while (iii < readLength && bases[iii] == (byte) 'T') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'A') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'C') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'G') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    if (iii < readLength) {
@ -149,7 +151,7 @@ public class CycleCovariate implements StandardCovariate {
                            cycle++;
                    }
                    if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
@ -159,19 +161,19 @@ public class CycleCovariate implements StandardCovariate {
                int iii = readLength - 1;
                while (iii >= 0) {
                    while (iii >= 0 && bases[iii] == (byte) 'T') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'A') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'C') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'G') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    if (iii >= 0) {
@ -181,7 +183,7 @@ public class CycleCovariate implements StandardCovariate {
                            cycle++;
                    }
                    if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                }
@ -201,4 +203,14 @@ public class CycleCovariate implements StandardCovariate {
    public final Object getValue(final String str) {
        return Integer.parseInt(str);
    }
    @Override
    public String keyFromBitSet(BitSet key) {
        return String.format("%d", BitSetUtils.shortFrom(key));
    }
    @Override
    public int numberOfBits() {
        return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
@ -1,7 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.BitSet;
 /*
 * Copyright (c) 2009 The Broad Institute
 *
@ -37,6 +40,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 public class QualityScoreCovariate implements RequiredCovariate {
    private final int MAX_QUAL = 50;
    // Initialize any member variables using the command-line arguments passed to the walkers
    @Override
    public void initialize(final RecalibrationArgumentCollection RAC) {
@ -46,18 +51,18 @@ public class QualityScoreCovariate implements RequiredCovariate {
    public CovariateValues getValues(final GATKSAMRecord read) {
        int readLength = read.getReadLength();
-        Integer [] mismatches = new Integer[readLength];
+        BitSet[] mismatches = new BitSet[readLength];
-        Integer [] insertions = new Integer[readLength];
+        BitSet[] insertions = new BitSet[readLength];
-        Integer []  deletions = new Integer[readLength];
+        BitSet[] deletions = new BitSet[readLength];
        byte[] baseQualities = read.getBaseQualities();
        byte[] baseInsertionQualities = read.getBaseInsertionQualities();
        byte[] baseDeletionQualities = read.getBaseDeletionQualities();
        for (int i = 0; i < baseQualities.length; i++) {
-            mismatches[i] = (int) baseQualities[i];
+            mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
-            insertions[i] = (int) baseInsertionQualities[i];
+            insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
-            deletions[i] = (int) baseDeletionQualities[i];
+            deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
        }
        return new CovariateValues(mismatches, insertions, deletions);
@ -68,4 +73,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
    public final Object getValue(final String str) {
        return Integer.parseInt(str);
    }
    @Override
    public String keyFromBitSet(BitSet key) {
        return String.format("%d", BitSetUtils.longFrom(key));
    }
    @Override
    public int numberOfBits() {
        return BitSetUtils.numberOfBitsToRepresent(MAX_QUAL);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
@ -1,8 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.HashMap;
 /*
@ -62,8 +64,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
            readGroupReverseLookupTable.put(nextId, readGroupId);
            nextId++;
        }
-        Short [] readGroups = new Short[l];
+        BitSet rg = BitSetUtils.bitSetFrom(shortId);  // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
-        Arrays.fill(readGroups, shortId);
+        BitSet[] readGroups = new BitSet[l];
        Arrays.fill(readGroups, rg);
        return new CovariateValues(readGroups, readGroups, readGroups);
    }
@ -73,9 +76,19 @@ public class ReadGroupCovariate implements RequiredCovariate {
        return str;
    }
    @Override
    public String keyFromBitSet(BitSet key) {
        return decodeReadGroup((short) BitSetUtils.longFrom(key));
    }
    public final String decodeReadGroup(final short id) {
        return readGroupReverseLookupTable.get(id);
    }
    @Override
    public int numberOfBits() {
        return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@ -67,9 +67,35 @@ public class RecalDataManager {
    private static final String COVARS_ATTRIBUTE = "COVARS";                                            // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
    public enum BaseRecalibrationType {
-        BASE_SUBSTITUTION,
+        BASE_SUBSTITUTION(0, "M"),
-        BASE_INSERTION,
+        BASE_INSERTION(1, "I"),
-        BASE_DELETION
+        BASE_DELETION(2, "D");
        public int index;
        public String representation;
        private BaseRecalibrationType(int index, String representation) {
            this.index = index;
            this.representation = representation;
        }
        public static BaseRecalibrationType eventFrom(int index) {
            switch (index) {
                case 0:
                    return BASE_SUBSTITUTION;
                case 1:
                    return BASE_INSERTION;
                case 2:
                    return BASE_DELETION;
                default:
                    throw new ReviewedStingException(String.format("Event %d does not exist.", index));
            }
        }
        @Override
        public String toString() {
            return representation;
        }
    }
    public enum SOLID_RECAL_MODE {
@ -136,7 +162,7 @@ public class RecalDataManager {
        }
    }
-    public static CovariateKeySet getAllCovariateValuesFor(GATKSAMRecord read) {
+    public static CovariateKeySet covariateKeySetFrom(GATKSAMRecord read) {
        return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
    }
@ -551,6 +577,7 @@ public class RecalDataManager {
    /**
     * Given the base and the color calculate the next base in the sequence
     *
     * @param read     the read
     * @param prevBase The base
     * @param color    The color
     * @return The next base in the sequence
@ -615,11 +642,12 @@ public class RecalDataManager {
     * Computes all requested covariates for every offset in the given read
     * by calling covariate.getValues(..).
     *
-     * @param read                The read for which to compute covariate values.
+     * It populates an array of covariate values where result[i][j] is the covariate
     * @param requestedCovariates The list of requested covariates.
     * @return An array of covariate values where result[i][j] is the covariate
     * value for the ith position in the read and the jth covariate in
     * reqeustedCovariates list.
     *
     * @param read                The read for which to compute covariate values.
     * @param requestedCovariates The list of requested covariates.
     */
    public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
        final int numRequestedCovariates = requestedCovariates.size();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
@ -108,7 +108,8 @@ public class RecalDatumOptimized {
        return empiricalQualByte(0);    // 'default' behavior is to use smoothing value of zero
    }
-    public final String outputToCSV() {
+    @Override
    public final String toString() {
        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@ -92,16 +91,6 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
    protected boolean RUN_WITHOUT_DBSNP = false;
    /////////////////////////////
    // protected Member Variables
    /////////////////////////////
    protected final RecalDataManager dataManager = new RecalDataManager();                // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
    protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
    protected final String SKIP_RECORD_ATTRIBUTE = "SKIP";                                // used to label reads that should be skipped.
    protected final String SEEN_ATTRIBUTE = "SEEN";                                       // used to label reads as processed.
    /**
     * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
     * reads which have had the reference inserted because of color space inconsistencies.
@ -153,7 +142,6 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
    public byte DELETIONS_DEFAULT_QUALITY = 45;
    @Hidden
    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
    public String DEFAULT_PLATFORM = null;
@ -161,5 +149,4 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
    public String FORCE_PLATFORM = null;
 }
--- a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
@ -0,0 +1,284 @@
 package org.broadinstitute.sting.utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.io.ByteArrayOutputStream;
 import java.io.ObjectOutputStream;
 import java.util.BitSet;
 /**
 * Utilities for bitset conversion
 *
 * @author Mauricio Carneiro
 * @since 3/5/12
 */
 public class BitSetUtils {
    static final private int MAX_DNA_CONTEXT = 31;                              // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
    static final private byte NBITS_LONG_REPRESENTATION = 64;                   // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
    static final private byte NBITS_SHORT_REPRESENTATION = 16;                  // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
    static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1];  // keeps the memoized table with the number of combinations for each given DNA context length
    /**
     * Creates an long out of a bitset
     *
     * @param bitSet the bitset
     * @return a long from the bitset representation
     */
    public static long longFrom(final BitSet bitSet) {
        return longFrom(bitSet, NBITS_LONG_REPRESENTATION);
    }
    /**
     * Creates a short integer from a bitset
     *
     * @param bitSet the bitset
     * @return a short from the bitset representation
     */
    public static short shortFrom(final BitSet bitSet) {
        return (short) longFrom(bitSet, NBITS_SHORT_REPRESENTATION);
    }
    /**
     * Cretes an integer with any number of bits (up to 64 -- long precision) from a bitset
     *
     * @param bitSet the bitset
     * @param nBits  the number of bits to be used for this representation
     * @return an integer with nBits from the bitset representation
     */
    public static long longFrom(final BitSet bitSet, final int nBits) {
        long number = 0;
        for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0 && bitIndex <= nBits; bitIndex = bitSet.nextSetBit(bitIndex + 1))
            number |= 1L << bitIndex;
        return number;
    }
    /**
     * Creates a BitSet representation of a given long
     *
     * @param number the number to turn into a bitset
     * @return a bitset representation of the long
     */
    public static BitSet bitSetFrom(long number) {
        return bitSetFrom(number, NBITS_LONG_REPRESENTATION);
    }
    /**
     * Creates a BitSet representation of a given short
     *
     * @param number the number to turn into a bitset
     * @return a bitset representation of the short
     */
    public static BitSet bitSetFrom(short number) {
        return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
    }
    /**
     * Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
     *
     * @param number the number to turn into a bitset
     * @param nBits  the number of bits to use as precision for this conversion
     * @return a bitset representation of the integer
     */
    public static BitSet bitSetFrom(long number, int nBits) {
        BitSet bitSet = new BitSet();
        boolean isNegative = number < 0;
        int bitIndex = 0;
        while (number != 0) {
            if (number % 2 != 0)
                bitSet.set(bitIndex);
            bitIndex++;
            number /= 2;
        }
        if (isNegative) {
            boolean foundFirstSetBit = false;
            for (int i = bitSet.nextSetBit(0); i < nBits && i >= 0; i++) {
                boolean bit = bitSet.get(i);
                if (!foundFirstSetBit && bit)
                    foundFirstSetBit = true;    // maintain all bits until the first 1 is found (inclusive)
                else if (foundFirstSetBit)
                    bitSet.flip(i);             // flip every other bit up to NBITS_REPRESENTATION
            }
        }
        return bitSet;
    }
    /**
     * Converts a BitSet into the dna string representation.
     *
     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
     * a bitSetFrom(BigNumber) method.
     *
     * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
     * base_10 representation of the sequence. This is important for us to know how to bring the number
     * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
     * as 0's and leading 0's are omitted).
     *
     * quasi-canonical because A is represented by a 0, therefore,
     * instead of : 0, 1, 2, 3, 10, 11, 12, ...
     * we have    : 0, 1, 2, 3, 00, 01, 02, ...
     *
     * but we can correctly decode it because we know the final length.
     *
     * @param bitSet the bitset representation of the dna sequence
     * @return the dna sequence represented by the bitset
     */
    public static String dnaFrom(final BitSet bitSet) {
        long number = longFrom(bitSet);         // the base_10 representation of the bit set
        if (number < 0)
            throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
        int length = contextLengthFor(number);  // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
        number -= combinationsFor(length - 1);  // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
        String dna = "";
        while (number > 0) {                    // perform a simple base_10 to base_4 conversion (quasi-canonical)
            byte base = (byte) (number % 4);
            switch (base) {
                case 0:
                    dna = "A" + dna;
                    break;
                case 1:
                    dna = "C" + dna;
                    break;
                case 2:
                    dna = "G" + dna;
                    break;
                case 3:
                    dna = "T" + dna;
                    break;
            }
            number /= 4;
        }
        for (int j = dna.length(); j < length; j++)
            dna = "A" + dna;                    // add leading A's as necessary (due to the "quasi" canonical status, see description above)
        return dna;
    }
    /**
     * Creates a BitSet representation of a given dna string.
     *
     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
     * a bitSetFrom(BigNumber) method.
     *
     * The bit representation of a dna string is the simple:
     * 0 A      4 AA     8 CA
     * 1 C      5 AC     ...
     * 2 G      6 AG     1343 TTGGT
     * 3 T      7 AT     1364 TTTTT
     *
     * To convert from dna to number, we convert the dna string to base10 and add all combinations that
     * preceded the string (with smaller lengths).
     *
     * @param dna the dna sequence
     * @return the bitset representing the dna sequence
     */
    public static BitSet bitSetFrom(String dna) {
        if (dna.length() > MAX_DNA_CONTEXT)
            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
        long baseTen = 0;                                       // the number in base_10 that we are going to use to generate the bit set
        long preContext = combinationsFor(dna.length() - 1);    // the sum of all combinations that preceded the length of the dna string
        for (int i = 0; i < dna.length(); i++) {
            baseTen *= 4;
            switch (dna.charAt(i)) {
                case 'A':
                    baseTen += 0;
                    break;
                case 'C':
                    baseTen += 1;
                    break;
                case 'G':
                    baseTen += 2;
                    break;
                case 'T':
                    baseTen += 3;
                    break;
            }
        }
        return bitSetFrom(baseTen + preContext);                // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
    }
    /**
     * Calculates the number of bits necessary to represent a given number of elements
     *
     * @param numberOfElements the number of elements to represent (must be positive)
     * @return the number of bits necessary to represent this many elements
     */
    public static int numberOfBitsToRepresent(long numberOfElements) {
        if (numberOfElements < 0)
            throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
        if (numberOfElements == 1L)
            return 1;   // special case
        int n = 0;
        numberOfElements--;
        while (numberOfElements > 0) {
            numberOfElements = numberOfElements >> 1;
            n++;
        }
        return n;
    }
    /**
     * Calculates the length of the DNA context for a given base 10 number
     *
     * It is important to know the length given the base 10 number to calculate the number of combinations
     * and to disambiguate the "quasi-canonical" state.
     *
     * This method also calculates the number of combinations as a by-product, but since it memoizes the
     * results, a subsequent call to combinationsFor(length) is O(1).
     *
     * @param number the base 10 representation of the bitset
     * @return the length of the DNA context represented by this number
     */
    private static int contextLengthFor(long number) {
        int length = 1;                              // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
        long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
        while (combinations <= number) {             // find the length of the dna string (length)
            length++;
            combinations = combinationsFor(length);  // calculate the next context
        }
        return length;
    }
    /**
     * The sum of all combinations of a context of a given length from length = 0 to length.
     *
     * Memoized implementation of sum(4^i) , where i=[0,length]
     *
     * @param length the length of the DNA context
     * @return the sum of all combinations leading up to this context length.
     */
    private static long combinationsFor(int length) {
        if (length > MAX_DNA_CONTEXT)
            throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
        // only calculate the number of combinations if the table hasn't already cached the value
        if (length > 0 && combinationsPerLength[length] == 0) {
            long combinations = 0L;
            for (int i = 1; i <= length; i++)
                combinations += (1L << 2 * i);        // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
            combinationsPerLength[length] = combinations;
        }
        return combinationsPerLength[length];
    }
    public static byte[] sizeOf(Object obj) throws java.io.IOException
    {
        ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
        objectOutputStream.writeObject(obj);
        objectOutputStream.flush();
        objectOutputStream.close();
        byteObject.close();
        return byteObject.toByteArray();
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -29,7 +29,6 @@ import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import java.math.BigDecimal;
@ -1527,124 +1526,4 @@ public class MathUtils {
    }
    /**
     * Creates an integer out of a bitset
     *
     * @param bitSet the bitset
     * @return an integer with the bitset representation
     */
    public static long intFrom(final BitSet bitSet) {
        long number = 0;
        for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
            number |= 1L << bitIndex;
        return number;
    }
    /**
     * Creates a BitSet representation of a given integer
     *
     * @param number the number to turn into a bitset
     * @return a bitset representation of the integer
     */
    public static BitSet bitSetFrom(long number) {
        BitSet bitSet = new BitSet();
        int bitIndex = 0;
        while (number > 0) {
            if (number%2 > 0)
                bitSet.set(bitIndex);
            bitIndex++;
            number /= 2;
        }
        return bitSet;
    }
    /**
     * Converts a BitSet into the dna string representation.
     *
     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
     * a bitSetFrom(BigNumber) method.
     *
     * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
     * base_10 representation of the sequence. This is important for us to know how to bring the number
     * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
     * as 0's and leading 0's are omitted).
     *
     * quasi-canonical because A is represented by a 0, therefore,
     *  instead of : 0, 1, 2, 3, 10, 11, 12, ...
     *  we have    : 0, 1, 2, 3, 00, 01, 02, ...
     *
     * but we can correctly decode it because we know the final length.
     *
     * @param bitSet the bitset representation of the dna sequence
     * @return the dna sequence represented by the bitset
     */
    public static String dnaFrom(final BitSet bitSet) {
        long number = intFrom(bitSet);      // the base_10 representation of the bit set
        long preContext = 0;                // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
        long nextContext = 4;               // the next context (we advance it so we know which one was preceding it).
        int i = 1;                          // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
        while (nextContext <= number) {     // find the length of the dna string (i)
            preContext = nextContext;       // keep track of the number of combinations in the preceding context
            nextContext += Math.pow(4, ++i);// calculate the next context
        }
        number -= preContext;               // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
        String dna = "";
        while (number > 0) {                // perform a simple base_10 to base_4 conversion (quasi-canonical)
            byte base = (byte) (number % 4);
            switch (base) {
                case 0 : dna = "A" + dna; break;
                case 1 : dna = "C" + dna; break;
                case 2 : dna = "G" + dna; break;
                case 3 : dna = "T" + dna; break;
            }
            number /= 4;
        }
        for (int j = dna.length(); j < i; j++)
            dna = "A" + dna;                // add leading A's as necessary (due to the "quasi" canonical status, see description above)
        return dna;
    }
    /**
     * Creates a BitSet representation of a given dna string.
     *
     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
     * a bitSetFrom(BigNumber) method.
     *
     * The bit representation of a dna string is the simple:
     * 0 A      4 AA     8 CA
     * 1 C      5 AC     ...
     * 2 G      6 AG     1343 TTGGT
     * 3 T      7 AT     1364 TTTTT
     *
     * To convert from dna to number, we convert the dna string to base10 and add all combinations that
     * preceded the string (with smaller lengths). 
     * 
     * @param dna the dna sequence
     * @return the bitset representing the dna sequence 
     */
    public static BitSet bitSetFrom(String dna) {
        if (dna.length() > 31)
            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
        long baseTen = 0;                       // the number in base_10 that we are going to use to generate the bit set 
        long preContext = 0;                    // the sum of all combinations that preceded the length of the dna string
        for (int i=0; i<dna.length(); i++) {
            baseTen *= 4;
            switch(dna.charAt(i)) {
                case 'A': baseTen += 0; break;
                case 'C': baseTen += 1; break;
                case 'G': baseTen += 2; break;
                case 'T': baseTen += 3; break;
            }
            if (i>0)
                preContext += Math.pow(4, i);   // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...) 
        }
        return bitSetFrom(baseTen+preContext);  // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -72,14 +72,16 @@ public class BaseRecalibration {
                lineNumber++;
                if (EOF_MARKER.equals(line)) {
                    sawEOF = true;
-                } else if( COMMENT_PATTERN.matcher(line).matches() )  {
+                }
                else if (COMMENT_PATTERN.matcher(line).matches()) {
                    ; // Skip over the comment lines, (which start with '#')
                }
                // Read in the covariates that were used from the input file
                else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
                    if (foundAllCovariates) {
                        throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
-                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
+                    }
                    else { // Found the covariate list in input file, loop through all of them and instantiate them
                        String[] vals = line.split(",");
                        for (int iii = 0; iii < vals.length - 4; iii++) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
                            boolean foundClass = false;
@ -102,7 +104,8 @@ public class BaseRecalibration {
                        }
                    }
-                } else { // Found a line of data
+                }
                else { // Found a line of data
                    if (!foundAllCovariates) {
                        foundAllCovariates = true;
@ -146,6 +149,7 @@ public class BaseRecalibration {
    /**
     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
     *
     * @param line A line of CSV data read from the recalibration table data file
     */
    private void addCSVData(final File file, final String line) {
@ -165,7 +169,7 @@ public class BaseRecalibration {
            key[iii] = cov.getValue(vals[iii]);
        }
        final String modelString = vals[iii++];
-        final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
+        final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.errorModelFrom(modelString);
        // Create a new datum using the number of observations, number of mismatches, and reported quality score
        final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
@ -178,7 +182,7 @@ public class BaseRecalibration {
        //compute all covariate values for this read
        RecalDataManager.computeCovariates(read, requestedCovariates);
-        final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
+        final CovariateKeySet covariateKeySet = RecalDataManager.covariateKeySetFrom(read);
        for (final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values()) {
            final byte[] originalQuals = read.getBaseQualities(errorModel);
@ -217,6 +221,7 @@ public class BaseRecalibration {
     * - The final shift equation is:
     *
     * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     *
     * @param key The list of Comparables that were calculated from the covariates
     * @return A recalibrated quality score as a byte
     */
@ -267,6 +272,7 @@ public class BaseRecalibration {
    /**
     * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
     *
     * @param originalQuals The list of original base quality scores
     * @param recalQuals    A list of the new recalibrated quality scores
     */
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -622,7 +623,6 @@ public class ReadUtils {
        HashMap<Integer, HashSet<GATKSAMRecord>> locusToReadMap = new HashMap<Integer, HashSet<GATKSAMRecord>>(2*(stopLocation - startLocation + 1), 0.5f);
        HashMap<GATKSAMRecord, Boolean[]> readToLocusMap = new HashMap<GATKSAMRecord, Boolean[]>(2*readList.size(), 0.5f);
        for (int i = startLocation; i <= stopLocation; i++)
            locusToReadMap.put(i, new HashSet<GATKSAMRecord>()); // Initialize the locusToRead map with empty lists
@ -649,6 +649,55 @@ public class ReadUtils {
        return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
    }
    /**
     * Create random read qualities
     *
     * @param length the length of the read
     * @return an array with randomized base qualities between 0 and 50
     */
    public static byte[] createRandomReadQuals(int length) {
        Random random = GenomeAnalysisEngine.getRandomGenerator();
        byte[] quals = new byte[length];
        for (int i = 0; i < length; i++)
            quals[i] = (byte) random.nextInt(50);
        return quals;
    }
    /**
     * Create random read qualities
     *
     * @param length  the length of the read
     * @param allowNs whether or not to allow N's in the read
     * @return an array with randomized bases (A-N) with equal probability
     */
    public static byte[] createRandomReadBases(int length, boolean allowNs) {
        Random random = GenomeAnalysisEngine.getRandomGenerator();
        int numberOfBases = allowNs ? 5 : 4;
        byte[] bases = new byte[length];
        for (int i = 0; i < length; i++) {
            switch (random.nextInt(numberOfBases)) {
                case 0:
                    bases[i] = 'A';
                    break;
                case 1:
                    bases[i] = 'C';
                    break;
                case 2:
                    bases[i] = 'G';
                    break;
                case 3:
                    bases[i] = 'T';
                    break;
                case 4:
                    bases[i] = 'N';
                    break;
                default:
                    throw new ReviewedStingException("Something went wrong, this is just impossible");
            }
        }
        return bases;
    }
    public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
        String[] sequenceRecordNames = new String[sequenceDictionary.size()];
        int sequenceRecordIndex = 0;
@ -656,4 +705,5 @@ public class ReadUtils {
            sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
        return Arrays.deepToString(sequenceRecordNames);
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
@ -1,9 +1,9 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@ -12,30 +12,6 @@ import java.util.BitSet;
 import java.util.Random;
 /**
 * Short one line description of the walker.
 *
 * <p>
 * [Long description of the walker]
 * </p>
 *
 *
 * <h2>Input</h2>
 * <p>
 * [Description of the Input]
 * </p>
 *
 * <h2>Output</h2>
 * <p>
 * [Description of the Output]
 * </p>
 *
 * <h2>Examples</h2>
 * <pre>
 *    java
 *      -jar GenomeAnalysisTK.jar
 *      -T [walker name]
 *  </pre>
 *
 * @author Mauricio Carneiro
 * @since 3/1/12
 */
@ -55,22 +31,27 @@ public class ContextCovariateUnitTest {
    @Test(enabled = true)
    public void testSimpleContexts() {
-        byte [] quals = createRandomReadQuals(101);
+        byte[] quals = ReadUtils.createRandomReadQuals(10000);
-        byte [] bbases = createRandomReadBases(101);
+        byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
        String bases = stringFrom(bbases);
        //        System.out.println("Read: " + bases);
        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
        CovariateValues values = covariate.getValues(read);
-        verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
-        verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
-        verifyCovariateArray((BitSet []) values.getDeletions(),  RAC.DELETIONS_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
    }
    private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
        for (int i = 0; i < values.length; i++) {
-            if (i >= contextSize)
+            String expectedContext = covariate.NO_CONTEXT_VALUE;
-                Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
+            if (i >= contextSize) {
-            else
+                String context = bases.substring(i - contextSize, i);
-                Assert.assertNull(values[i]);
+                if (!context.contains("N"))
                    expectedContext = context;
            }
            //            System.out.println(String.format("Context [%d]:\n%s\n%s\n", i, covariate.keyFromBitSet(values[i]), expectedContext));
            Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
        }
    }
@ -81,23 +62,4 @@ public class ContextCovariateUnitTest {
        return s;
    }
    private byte [] createRandomReadQuals(int length) {
        byte [] quals = new byte[length];
        for (int i=0; i<length; i++)
            quals[i] = (byte) random.nextInt(50);
        return quals;
    }
    private byte [] createRandomReadBases(int length) {
        byte [] bases = new byte[length];
        for (int i=0; i<length; i++) {
            switch(random.nextInt(4)) {
                case 0: bases[i] = 'A'; break;
                case 1: bases[i] = 'C'; break;
                case 2: bases[i] = 'G'; break;
                case 3: bases[i] = 'T'; break;
            }
        }
        return bases;
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
@ -0,0 +1,68 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 import java.util.BitSet;
 import java.util.Random;
 /**
 * @author Mauricio Carneiro
 * @since 3/1/12
 */
 public class CycleCovariateUnitTest {
    CycleCovariate covariate;
    RecalibrationArgumentCollection RAC;
    Random random;
    @BeforeClass
    public void init() {
        RAC = new RecalibrationArgumentCollection();
        covariate = new CycleCovariate();
        random = GenomeAnalysisEngine.getRandomGenerator();
        covariate.initialize(RAC);
    }
    @Test(enabled = true)
    public void testSimpleCycles() {
        short readLength = 10;
        byte[] quals = ReadUtils.createRandomReadQuals(readLength);
        byte[] bbases = ReadUtils.createRandomReadBases(readLength, true);
        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
        read.getReadGroup().setPlatform("illumina");
        CovariateValues values = covariate.getValues(read);
        verifyCovariateArray(values.getMismatches(), (short) 1, (short) 1);
        read.setReadNegativeStrandFlag(true);
        values = covariate.getValues(read);
        verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
        read.setReadPairedFlag(true);
        read.setSecondOfPairFlag(true);
        values = covariate.getValues(read);
        verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
        read.setReadNegativeStrandFlag(false);
        values = covariate.getValues(read);
        verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
    }
    private void verifyCovariateArray(BitSet[] values, short init, short increment) {
        for (short i = 0; i < values.length; i++) {
            short actual = Short.decode(covariate.keyFromBitSet(values[i]));
            int expected = init + (increment * i);
            //            System.out.println(String.format("%d: %d, %d", i, actual, expected));
            Assert.assertEquals(actual, expected);
        }
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java
@ -0,0 +1,75 @@
 package org.broadinstitute.sting.utils;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 import java.util.Random;
 /**
 * @author Mauricio Carneiro
 * @since 3/5/12
 */
 public class BitSetUtilsUnitTest {
    private static int RANDOM_NUMBERS_TO_TRY = 87380;
    private static Random random;
    @BeforeClass
    public void init() {
        random = GenomeAnalysisEngine.getRandomGenerator();
    }
    @Test(enabled = true)
    public void testLongBitSet() {
        long[] numbers = {0L, 1L, 428L, 65536L, 239847L, 4611686018427387903L, Long.MAX_VALUE, Long.MIN_VALUE, -1L, -2L, -7L, -128L, -65536L, -100000L};
        for (long n : numbers)
            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);
        for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
            long n = random.nextLong();
            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);    // Because class Random uses a seed with only 48 bits, this algorithm will not return all possible long values.
        }
    }
    @Test(enabled = true)
    public void testShortBitSet() {
        short[] numbers = {0, 1, 428, 25934, 23847, 16168, Short.MAX_VALUE, Short.MIN_VALUE, -1, -2, -7, -128, -12312, -31432};
        for (long n : numbers)
            Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
        for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
            short n = (short) random.nextInt();
            Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
        }
    }
    @Test(enabled = true)
    public void testDNAAndBitSetConversion() {
        String[] dna = {"AGGTGTTGT", "CCCCCCCCCCCCCC", "GGGGGGGGGGGGGG", "TTTTTTTTTTTTTT", "GTAGACCGATCTCAGCTAGT", "AACGTCAATGCAGTCAAGTCAGACGTGGGTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"};
        // Test all contexts of size 1-8.
        for (long n = 0; n < RANDOM_NUMBERS_TO_TRY; n++)
            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(n)))), n);
        // Test the special cases listed in the dna array
        for (String d : dna)
            Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
    }
    @Test(enabled = true)
    public void testNumberOfBitsToRepresent() {
        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented 
        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
        for (int i = 1; i < 63; i++) {                                  // Can't test i == 63 because n1 is a negative number
            long n1 = 1L << i;
            long n2 = Math.abs(random.nextLong()) % n1;
            long n3 = n1 | n2;
            Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
            Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n1), i);
        }
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.utils;
 import org.broadinstitute.sting.BaseTest;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
@ -131,7 +130,8 @@ public class MathUtilsUnitTest extends BaseTest {
        int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
        MathUtils.RunningAverage r = new MathUtils.RunningAverage();
-        for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
+        for (int i = 0; i < numbers.length; i++)
            r.add((double) numbers[i]);
        Assert.assertEquals((long) numbers.length, r.observationCount());
        Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
@ -223,35 +223,6 @@ public class MathUtilsUnitTest extends BaseTest {
        return set.isEmpty();
    }
    @Test(enabled = true)
    public void testIntAndBitSetConversion() {
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)),       428);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)),    239847);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)),     12726);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)),         0);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)),         1);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)),     65536);
        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
    }
    @Test(enabled = true)
    public void testDNAAndBitSetConversion() {
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")),                    "ACGT");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")),               "AGGTGTTGT");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")),                       "A");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")),                       "C");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")),                       "G");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")),                       "T");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")),                      "CC");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")),                      "AA");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")),                    "AAAA");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")),          "CCCCCCCCCCCCCC");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")),          "GGGGGGGGGGGGGG");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")),          "TTTTTTTTTTTTTT");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")),    "GTAGACCGATCTCAGCTAGT");
        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")),    "AACGTCAATGCAGTCAAGTCAGACGTGGGTT");  // testing max precision (length == 31)
    }
    @Test
    public void testApproximateLog10SumLog10() {
        Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);