BitSets keys to lower BQSR's memory footprint

Infrastructure: * Generic BitSet implementation with any precision (up to long) * Two's complement implementation of the bit set handles negative numbers (cycle covariate) * Memoized implementation of the BitSet utils for better performance. * All exponents are now calculated with bit shifts, fixing numerical precision issues with the double Math.pow. * Replace log/sqrt with bitwise logic to get rid of numerical issues BQSR: * All covariates output BitSets and have the functionality to decode them back into Object values. * Covariates are responsible for determining the size of the key they will use (number of bits). * Generalized KeyManager implementation combines any arbitrary number of covariates into one bitset key with event type * No more NestedHashMaps. Single key system now fits in one hash to reduce hash table objects overhead Tests: * Unit tests added to every method of BitSetUtils * Unit tests added to the generalized key system infrastructure of BQSRv2 (KeyManager) * Unit tests added to the cycle and context covariates (will add unit tests to all covariates)
2012-03-05 17:37:34 -05:00 · 2012-03-05 17:37:34 -05:00 · ca11ab39e7
parent e86ce8f3d6
commit ca11ab39e7
19 changed files with 868 additions and 457 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@ -26,7 +26,7 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@ -43,7 +43,10 @@ public class ContextCovariate implements StandardCovariate {

    private int mismatchesContextSize;
    private int insertionsContextSize;
-    private int  deletionsContextSize;
+    private int deletionsContextSize;
+
+    private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
+    protected final String NO_CONTEXT_VALUE = "N";                          // protected so we can UNIT TEST it

    // Initialize any member variables using the command-line arguments passed to the walkers
    @Override
@ -62,7 +65,7 @@ public class ContextCovariate implements StandardCovariate {
        int l = read.getReadLength();
        BitSet[] mismatches = new BitSet[l];
        BitSet[] insertions = new BitSet[l];
-        BitSet[] deletions  = new BitSet[l];
+        BitSet[] deletions = new BitSet[l];

        final boolean negativeStrand = read.getReadNegativeStrandFlag();
        byte[] bases = read.getReadBases();
@ -72,7 +75,7 @@ public class ContextCovariate implements StandardCovariate {
        for (int i = 0; i < read.getReadLength(); i++) {
            mismatches[i] = contextWith(bases, i, mismatchesContextSize);
            insertions[i] = contextWith(bases, i, insertionsContextSize);
-            deletions[i]  = contextWith(bases, i,  deletionsContextSize);
+            deletions[i] = contextWith(bases, i, deletionsContextSize);
        }

        if (negativeStrand) {
@ -89,24 +92,35 @@ public class ContextCovariate implements StandardCovariate {
        return str;
    }

+    @Override
+    public String keyFromBitSet(BitSet key) {
+        if (key.equals(NO_CONTEXT_BITSET))
+            return NO_CONTEXT_VALUE;
+        return BitSetUtils.dnaFrom(key);
+    }
+
+    @Override
+    public int numberOfBits() {
+        return Long.bitCount(-1L);
+    }
+
    /**
-     * calculates the context of a base independent of the covariate mode
+     * calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
     *
-     * @param bases           the bases in the read to build the context from
-     * @param offset          the position in the read to calculate the context for
-     * @param contextSize     context size to use building the context
-     * @return
+     * @param bases       the bases in the read to build the context from
+     * @param offset      the position in the read to calculate the context for
+     * @param contextSize context size to use building the context
+     * @return the bitSet representing the Context
     */
-    private BitSet contextWith(byte [] bases, int offset, int contextSize) {
-        if (offset < contextSize)
-            return null;
-        
-        String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
-        if (context.contains("N"))
-            return null;
-        
-        return MathUtils.bitSetFrom(context);
-    } 
+    private BitSet contextWith(byte[] bases, int offset, int contextSize) {
+        BitSet result = NO_CONTEXT_BITSET;
+        if (offset >= contextSize) {
+            String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
+            if (!context.contains("N"))
+                result = BitSetUtils.bitSetFrom(context);
+        }
+        return result;
+    }

    /**
     * Reverses the given array in place.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

+import java.util.BitSet;
+
 /*
 * Copyright (c) 2009 The Broad Institute
 *
@ -53,7 +55,29 @@ public interface Covariate {
     */
    public CovariateValues getValues(GATKSAMRecord read);

-    public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    /**
+     * Used to get the covariate's value from input csv file during on-the-fly recalibration
+     *
+     * @param str the key in string type (read from the csv)
+     * @return the key in it's correct type.
+     */
+    public Object getValue(String str);
+
+    /**
+     * Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
+     *
+     * @param key the bitset representation of the key
+     * @return a string representation of the key
+     */
+    public String keyFromBitSet(BitSet key);
+
+    /**
+     * Each covariate should determine how many bits are necessary to encode it's data
+     *
+     * @return The number of bits used to represent the values of this covariate.
+     */
+    public int numberOfBits();
+
 }

 interface RequiredCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
@ -2,87 +2,107 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

+import java.util.BitSet;
+import java.util.HashMap;
+
 /**
- * The object temporarily held by a read that describes all of it's covariates. 
- * 
+ * The object temporarily held by a read that describes all of it's covariates.
+ *
 * In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
 *
 * @author Mauricio Carneiro
 * @since 2/8/12
 */
 public class CovariateKeySet {
-    private Object[][] mismatchesKeySet;
-    private Object[][] insertionsKeySet;
-    private Object[][]  deletionsKeySet;
+    private BitSet[][] mismatchesKeySet;
+    private BitSet[][] insertionsKeySet;
+    private BitSet[][] deletionsKeySet;

    private int nextCovariateIndex;
-    
-    private static String mismatchesCovariateName = "M";
-    private static String insertionsCovariateName = "I";
-    private static String  deletionsCovariateName = "D";
+
+    //    private static String mismatchesCovariateName = "M";
+    //    private static String insertionsCovariateName = "I";
+    //    private static String deletionsCovariateName  = "D";
+    //
+    //    private static BitSet mismatchesCovariateBitSet = BitSetUtils.bitSetFrom(0);
+    //    private static BitSet insertionsCovariateBitSet = BitSetUtils.bitSetFrom(1);
+    //    private static BitSet deletionsCovariateBitSet = BitSetUtils.bitSetFrom(2);
+
+    private static HashMap<String, RecalDataManager.BaseRecalibrationType> nameToType = new HashMap<String, RecalDataManager.BaseRecalibrationType>();
+    private static HashMap<BitSet, String> bitSetToName = new HashMap<BitSet, String>();

    public CovariateKeySet(int readLength, int numberOfCovariates) {
-        numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
-        this.mismatchesKeySet = new Object[readLength][numberOfCovariates]; 
-        this.insertionsKeySet = new Object[readLength][numberOfCovariates];
-        this.deletionsKeySet  = new Object[readLength][numberOfCovariates];
-        initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
-        initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
-        initializeCovariateKeySet(this.deletionsKeySet,  deletionsCovariateName);
+        //        numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
+        this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
+        this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
+        this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
+        //        initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateBitSet);
+        //        initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateBitSet);
+        //        initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateBitSet);
        this.nextCovariateIndex = 0;
+
+        //        nameToType.put(mismatchesCovariateName, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION);
+        //        nameToType.put(insertionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
+        //        nameToType.put(deletionsCovariateName,  RecalDataManager.BaseRecalibrationType.BASE_DELETION);
+        //
+        //        bitSetToName.put(BitSetUtils.bitSetFrom(0), mismatchesCovariateName);
+        //        bitSetToName.put(BitSetUtils.bitSetFrom(1), insertionsCovariateName);
+        //        bitSetToName.put(BitSetUtils.bitSetFrom(2), deletionsCovariateName);
    }
-    
+
    public void addCovariate(CovariateValues covariate) {
        transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
        transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
-        transposeCovariateValues(deletionsKeySet,  covariate.getDeletions());
+        transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
        nextCovariateIndex++;
    }

-    public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
-        if (modelString.equals(mismatchesCovariateName))
-            return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
-        else if (modelString.equals(insertionsCovariateName))
-            return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
-        else if (modelString.equals(deletionsCovariateName))
-            return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
-        throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
+    public static RecalDataManager.BaseRecalibrationType errorModelFrom(final String modelString) {
+        if (!nameToType.containsKey(modelString))
+            throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
+        return nameToType.get(modelString);
    }

-    public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
+    public static String eventNameFrom(final BitSet bitSet) {
+        if (!bitSetToName.containsKey(bitSet))
+            throw new ReviewedStingException("Unrecognized Event Type BitSet: " + bitSet);
+        return bitSetToName.get(bitSet);
+    }
+
+    public BitSet[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
        switch (errorModel) {
            case BASE_SUBSTITUTION:
-                    return getMismatchesKeySet(readPosition);
+                return getMismatchesKeySet(readPosition);
            case BASE_INSERTION:
-                    return getInsertionsKeySet(readPosition);
+                return getInsertionsKeySet(readPosition);
            case BASE_DELETION:
-                    return getDeletionsKeySet(readPosition);
+                return getDeletionsKeySet(readPosition);
            default:
-                    throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
+                throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
        }
    }

-    public Object[] getMismatchesKeySet(int readPosition) {
+    public BitSet[] getMismatchesKeySet(int readPosition) {
        return mismatchesKeySet[readPosition];
    }

-    public Object[] getInsertionsKeySet(int readPosition) {
+    public BitSet[] getInsertionsKeySet(int readPosition) {
        return insertionsKeySet[readPosition];
    }

-    public Object[] getDeletionsKeySet(int readPosition) {
+    public BitSet[] getDeletionsKeySet(int readPosition) {
        return deletionsKeySet[readPosition];
    }

-    private void transposeCovariateValues (Object [][] keySet, Object [] covariateValues) {
-        for (int i=0; i<covariateValues.length; i++) 
-            keySet[i][nextCovariateIndex] = covariateValues[i];        
+    private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
+        for (int i = 0; i < covariateValues.length; i++)
+            keySet[i][nextCovariateIndex] = covariateValues[i];
    }
-    
-    private void initializeCovariateKeySet (Object[][] keySet, String covariateName) {
+
+    private void initializeCovariateKeySet(BitSet[][] keySet, BitSet covariateName) {
        int readLength = keySet.length;
        int lastCovariateIndex = keySet[0].length - 1;
-        for (int i = 0; i < readLength; i++) 
+        for (int i = 0; i < readLength; i++)
            keySet[i][lastCovariateIndex] = covariateName;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
@ -1,5 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

+import java.util.BitSet;
+
 /**
 * An object to hold the different covariate values for all bases in the read.
 *
@ -12,25 +14,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * @since 2/8/12
 */
 public class CovariateValues {
-    private Object[] mismatches;
-    private Object[] insertions;
-    private Object[] deletions;
+    private BitSet[] mismatches;
+    private BitSet[] insertions;
+    private BitSet[] deletions;

-    public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
+    public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
        this.mismatches = mismatch;
        this.insertions = insertion;
        this.deletions = deletion;
    }

-    public Object[] getMismatches() {
+    public BitSet[] getMismatches() {
        return mismatches;
    }

-    public Object[] getInsertions() {
+    public BitSet[] getInsertions() {
        return insertions;
    }

-    public Object[] getDeletions() {
+    public BitSet[] getDeletions() {
        return deletions;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
@ -1,10 +1,12 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

+import java.util.BitSet;
 import java.util.EnumSet;

 /*
@ -59,13 +61,13 @@ public class CycleCovariate implements StandardCovariate {
    // Used to pick out the covariate's value from attributes of the read
    @Override
    public CovariateValues getValues(final GATKSAMRecord read) {
-        Integer [] cycles = new Integer[read.getReadLength()];
+        BitSet[] cycles = new BitSet[read.getReadLength()];
        final NGSPlatform ngsPlatform = read.getNGSPlatform();

        // Discrete cycle platforms
        if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
-            final int init;
-            final int increment;
+            final short init;
+            final short increment;
            if (!read.getReadNegativeStrandFlag()) {
                // Differentiate between first and second of pair.
                // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
@ -88,19 +90,19 @@ public class CycleCovariate implements StandardCovariate {
            else {
                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
                    //second of pair, negative strand
-                    init = -read.getReadLength();
+                    init = (short) -read.getReadLength();
                    increment = 1;
                }
                else {
                    //first of pair, negative strand
-                    init = read.getReadLength();
+                    init = (short) read.getReadLength();
                    increment = -1;
                }
            }

-            int cycle = init;
+            short cycle = init;
            for (int i = 0; i < read.getReadLength(); i++) {
-                cycles[i] = cycle;
+                cycles[i] = BitSetUtils.bitSetFrom(cycle);
                cycle += increment;
            }
        }
@ -119,7 +121,7 @@ public class CycleCovariate implements StandardCovariate {
            //   the current sequential model would consider the effects independently instead of jointly.
            final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();

-            int cycle = multiplyByNegative1 ? -1 : 1;
+            short cycle = multiplyByNegative1 ? (short) -1 : 1;

            // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
            // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -127,19 +129,19 @@ public class CycleCovariate implements StandardCovariate {
                int iii = 0;
                while (iii < readLength) {
                    while (iii < readLength && bases[iii] == (byte) 'T') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'A') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'C') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    while (iii < readLength && bases[iii] == (byte) 'G') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }
                    if (iii < readLength) {
@ -149,7 +151,7 @@ public class CycleCovariate implements StandardCovariate {
                            cycle++;
                    }
                    if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii++;
                    }

@ -159,19 +161,19 @@ public class CycleCovariate implements StandardCovariate {
                int iii = readLength - 1;
                while (iii >= 0) {
                    while (iii >= 0 && bases[iii] == (byte) 'T') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'A') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'C') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    while (iii >= 0 && bases[iii] == (byte) 'G') {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                    if (iii >= 0) {
@ -181,7 +183,7 @@ public class CycleCovariate implements StandardCovariate {
                            cycle++;
                    }
                    if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
-                        cycles[iii] = cycle;
+                        cycles[iii] = BitSetUtils.bitSetFrom(cycle);
                        iii--;
                    }
                }
@ -192,7 +194,7 @@ public class CycleCovariate implements StandardCovariate {
        else {
            throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
        }
-        
+
        return new CovariateValues(cycles, cycles, cycles);
    }

@ -201,4 +203,14 @@ public class CycleCovariate implements StandardCovariate {
    public final Object getValue(final String str) {
        return Integer.parseInt(str);
    }
+
+    @Override
+    public String keyFromBitSet(BitSet key) {
+        return String.format("%d", BitSetUtils.shortFrom(key));
+    }
+
+    @Override
+    public int numberOfBits() {
+        return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
@ -1,7 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

+import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

+import java.util.BitSet;
+
 /*
 * Copyright (c) 2009 The Broad Institute
 *
@ -37,6 +40,8 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 public class QualityScoreCovariate implements RequiredCovariate {

+    private final int MAX_QUAL = 50;
+
    // Initialize any member variables using the command-line arguments passed to the walkers
    @Override
    public void initialize(final RecalibrationArgumentCollection RAC) {
@ -46,18 +51,18 @@ public class QualityScoreCovariate implements RequiredCovariate {
    public CovariateValues getValues(final GATKSAMRecord read) {
        int readLength = read.getReadLength();

-        Integer [] mismatches = new Integer[readLength];
-        Integer [] insertions = new Integer[readLength];
-        Integer []  deletions = new Integer[readLength];
+        BitSet[] mismatches = new BitSet[readLength];
+        BitSet[] insertions = new BitSet[readLength];
+        BitSet[] deletions = new BitSet[readLength];

-        byte [] baseQualities = read.getBaseQualities();
-        byte [] baseInsertionQualities = read.getBaseInsertionQualities();
-        byte [] baseDeletionQualities = read.getBaseDeletionQualities();
+        byte[] baseQualities = read.getBaseQualities();
+        byte[] baseInsertionQualities = read.getBaseInsertionQualities();
+        byte[] baseDeletionQualities = read.getBaseDeletionQualities();

-        for (int i=0; i<baseQualities.length; i++) {
-            mismatches[i] = (int) baseQualities[i];
-            insertions[i] = (int) baseInsertionQualities[i];
-            deletions[i] = (int) baseDeletionQualities[i];
+        for (int i = 0; i < baseQualities.length; i++) {
+            mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
+            insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
+            deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
        }

        return new CovariateValues(mismatches, insertions, deletions);
@ -68,4 +73,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
    public final Object getValue(final String str) {
        return Integer.parseInt(str);
    }
+
+    @Override
+    public String keyFromBitSet(BitSet key) {
+        return String.format("%d", BitSetUtils.longFrom(key));
+    }
+
+    @Override
+    public int numberOfBits() {
+        return BitSetUtils.numberOfBitsToRepresent(MAX_QUAL);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
@ -1,8 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

+import org.broadinstitute.sting.utils.BitSetUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.HashMap;

 /*
@ -39,7 +41,7 @@ import java.util.HashMap;
 */

 public class ReadGroupCovariate implements RequiredCovariate {
-    
+
    private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
    private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
    private short nextId = 0;
@ -54,7 +56,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
        final int l = read.getReadLength();
        final String readGroupId = read.getReadGroup().getReadGroupId();
        short shortId;
-        if (readGroupLookupTable.containsKey(readGroupId)) 
+        if (readGroupLookupTable.containsKey(readGroupId))
            shortId = readGroupLookupTable.get(readGroupId);
        else {
            shortId = nextId;
@ -62,8 +64,9 @@ public class ReadGroupCovariate implements RequiredCovariate {
            readGroupReverseLookupTable.put(nextId, readGroupId);
            nextId++;
        }
-        Short [] readGroups = new Short[l];
-        Arrays.fill(readGroups, shortId);
+        BitSet rg = BitSetUtils.bitSetFrom(shortId);  // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
+        BitSet[] readGroups = new BitSet[l];
+        Arrays.fill(readGroups, rg);
        return new CovariateValues(readGroups, readGroups, readGroups);
    }

@ -72,10 +75,20 @@ public class ReadGroupCovariate implements RequiredCovariate {
    public final Object getValue(final String str) {
        return str;
    }
-    
+
+    @Override
+    public String keyFromBitSet(BitSet key) {
+        return decodeReadGroup((short) BitSetUtils.longFrom(key));
+    }
+
    public final String decodeReadGroup(final short id) {
        return readGroupReverseLookupTable.get(id);
    }
+
+    @Override
+    public int numberOfBits() {
+        return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
+    }
 }


--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@ -58,18 +58,44 @@ public class RecalDataManager {
    private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore;              // Table where everything except read group and quality score has been collapsed
    private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate;    // Tables where everything except read group, quality score, and given covariate has been collapsed

-    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
-    public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ";   // The tag that holds the color space quality scores for SOLID bams
-    public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS";        // The tag that holds the color space for SOLID bams
-    public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC";    // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
+    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";                                      // The tag that holds the original quality scores
+    public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ";                                   // The tag that holds the color space quality scores for SOLID bams
+    public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS";                                        // The tag that holds the color space for SOLID bams
+    public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC";                                    // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
    private static boolean warnUserNullPlatform = false;

-    private static final String COVARS_ATTRIBUTE = "COVARS";                   // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
+    private static final String COVARS_ATTRIBUTE = "COVARS";                                            // used to store covariates array as a temporary attribute inside GATKSAMRecord.\

    public enum BaseRecalibrationType {
-        BASE_SUBSTITUTION,
-        BASE_INSERTION,
-        BASE_DELETION
+        BASE_SUBSTITUTION(0, "M"),
+        BASE_INSERTION(1, "I"),
+        BASE_DELETION(2, "D");
+
+        public int index;
+        public String representation;
+
+        private BaseRecalibrationType(int index, String representation) {
+            this.index = index;
+            this.representation = representation;
+        }
+
+        public static BaseRecalibrationType eventFrom(int index) {
+            switch (index) {
+                case 0:
+                    return BASE_SUBSTITUTION;
+                case 1:
+                    return BASE_INSERTION;
+                case 2:
+                    return BASE_DELETION;
+                default:
+                    throw new ReviewedStingException(String.format("Event %d does not exist.", index));
+            }
+        }
+
+        @Override
+        public String toString() {
+            return representation;
+        }
    }

    public enum SOLID_RECAL_MODE {
@ -119,7 +145,7 @@ public class RecalDataManager {
            dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
            dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
            dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
-            for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
+            for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
                dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
                dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
                dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
@ -136,10 +162,10 @@ public class RecalDataManager {
        }
    }

-    public static CovariateKeySet getAllCovariateValuesFor(GATKSAMRecord read) {
+    public static CovariateKeySet covariateKeySetFrom(GATKSAMRecord read) {
        return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
    }
-    
+
    /**
     * Add the given mapping to all of the collapsed hash tables
     *
@ -147,7 +173,7 @@ public class RecalDataManager {
     * @param fullDatum                  The RecalDatum which is the data for this mapping
     * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
     */
-    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
+    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel) {

        // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
        //data.put(key, thisDatum); // add the mapping to the main table
@ -208,7 +234,7 @@ public class RecalDataManager {
     */
    public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {

-        for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
+        for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
            recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
            recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
            for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
@ -551,6 +577,7 @@ public class RecalDataManager {
    /**
     * Given the base and the color calculate the next base in the sequence
     *
+     * @param read     the read
     * @param prevBase The base
     * @param color    The color
     * @return The next base in the sequence
@ -615,11 +642,12 @@ public class RecalDataManager {
     * Computes all requested covariates for every offset in the given read
     * by calling covariate.getValues(..).
     *
+     * It populates an array of covariate values where result[i][j] is the covariate
+     * value for the ith position in the read and the jth covariate in
+     * reqeustedCovariates list.
+     *
     * @param read                The read for which to compute covariate values.
     * @param requestedCovariates The list of requested covariates.
-     * @return An array of covariate values where result[i][j] is the covariate
-     *         value for the ith position in the read and the jth covariate in
-     *         reqeustedCovariates list.
     */
    public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
        final int numRequestedCovariates = requestedCovariates.size();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
@ -94,7 +94,7 @@ public class RecalDatumOptimized {
    public final double empiricalQualDouble(final int smoothing, final double maxQual) {
        final double doubleMismatches = (double) (numMismatches + smoothing);
        final double doubleObservations = (double) (numObservations + smoothing);
-        double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);        
+        double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
        return Math.min(empiricalQual, maxQual);
    }

@ -106,9 +106,10 @@ public class RecalDatumOptimized {

    public final byte empiricalQualByte() {
        return empiricalQualByte(0);    // 'default' behavior is to use smoothing value of zero
-    } 
+    }

-    public final String outputToCSV() {
+    @Override
+    public final String toString() {
        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.walkers.recalibration.CountCovariatesGatherer;

 import java.io.PrintStream;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;

@ -92,16 +91,6 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
    protected boolean RUN_WITHOUT_DBSNP = false;

-    /////////////////////////////
-    // protected Member Variables
-    /////////////////////////////
-    protected final RecalDataManager dataManager = new RecalDataManager();                // Holds the data HashMap used to create collapsed data hashmaps (delta delta tables)
-    protected final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>();// A list to hold the covariate objects that were requested
-
-    protected final String SKIP_RECORD_ATTRIBUTE = "SKIP";                                // used to label reads that should be skipped.
-    protected final String SEEN_ATTRIBUTE = "SEEN";                                       // used to label reads as processed.
-
-
    /**
     * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
     * reads which have had the reference inserted because of color space inconsistencies.
@ -153,7 +142,6 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
    public byte DELETIONS_DEFAULT_QUALITY = 45;

-
    @Hidden
    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
    public String DEFAULT_PLATFORM = null;
@ -161,5 +149,4 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
    public String FORCE_PLATFORM = null;

-
 }
--- a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
@ -0,0 +1,284 @@
+package org.broadinstitute.sting.utils;
+
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.ByteArrayOutputStream;
+import java.io.ObjectOutputStream;
+import java.util.BitSet;
+
+/**
+ * Utilities for bitset conversion
+ *
+ * @author Mauricio Carneiro
+ * @since 3/5/12
+ */
+public class BitSetUtils {
+
+    static final private int MAX_DNA_CONTEXT = 31;                              // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
+    static final private byte NBITS_LONG_REPRESENTATION = 64;                   // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
+    static final private byte NBITS_SHORT_REPRESENTATION = 16;                  // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
+    static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1];  // keeps the memoized table with the number of combinations for each given DNA context length
+
+    /**
+     * Creates an long out of a bitset
+     *
+     * @param bitSet the bitset
+     * @return a long from the bitset representation
+     */
+    public static long longFrom(final BitSet bitSet) {
+        return longFrom(bitSet, NBITS_LONG_REPRESENTATION);
+    }
+
+    /**
+     * Creates a short integer from a bitset
+     *
+     * @param bitSet the bitset
+     * @return a short from the bitset representation
+     */
+    public static short shortFrom(final BitSet bitSet) {
+        return (short) longFrom(bitSet, NBITS_SHORT_REPRESENTATION);
+    }
+
+    /**
+     * Cretes an integer with any number of bits (up to 64 -- long precision) from a bitset
+     *
+     * @param bitSet the bitset
+     * @param nBits  the number of bits to be used for this representation
+     * @return an integer with nBits from the bitset representation
+     */
+    public static long longFrom(final BitSet bitSet, final int nBits) {
+        long number = 0;
+        for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0 && bitIndex <= nBits; bitIndex = bitSet.nextSetBit(bitIndex + 1))
+            number |= 1L << bitIndex;
+
+        return number;
+    }
+
+    /**
+     * Creates a BitSet representation of a given long
+     *
+     * @param number the number to turn into a bitset
+     * @return a bitset representation of the long
+     */
+    public static BitSet bitSetFrom(long number) {
+        return bitSetFrom(number, NBITS_LONG_REPRESENTATION);
+    }
+
+    /**
+     * Creates a BitSet representation of a given short
+     *
+     * @param number the number to turn into a bitset
+     * @return a bitset representation of the short
+     */
+    public static BitSet bitSetFrom(short number) {
+        return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
+    }
+
+    /**
+     * Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
+     *
+     * @param number the number to turn into a bitset
+     * @param nBits  the number of bits to use as precision for this conversion
+     * @return a bitset representation of the integer
+     */
+    public static BitSet bitSetFrom(long number, int nBits) {
+        BitSet bitSet = new BitSet();
+        boolean isNegative = number < 0;
+        int bitIndex = 0;
+        while (number != 0) {
+            if (number % 2 != 0)
+                bitSet.set(bitIndex);
+            bitIndex++;
+            number /= 2;
+        }
+        if (isNegative) {
+            boolean foundFirstSetBit = false;
+            for (int i = bitSet.nextSetBit(0); i < nBits && i >= 0; i++) {
+                boolean bit = bitSet.get(i);
+                if (!foundFirstSetBit && bit)
+                    foundFirstSetBit = true;    // maintain all bits until the first 1 is found (inclusive)
+                else if (foundFirstSetBit)
+                    bitSet.flip(i);             // flip every other bit up to NBITS_REPRESENTATION
+            }
+        }
+        return bitSet;
+    }
+
+    /**
+     * Converts a BitSet into the dna string representation.
+     *
+     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
+     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
+     * a bitSetFrom(BigNumber) method.
+     *
+     * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
+     * base_10 representation of the sequence. This is important for us to know how to bring the number
+     * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
+     * as 0's and leading 0's are omitted).
+     *
+     * quasi-canonical because A is represented by a 0, therefore,
+     * instead of : 0, 1, 2, 3, 10, 11, 12, ...
+     * we have    : 0, 1, 2, 3, 00, 01, 02, ...
+     *
+     * but we can correctly decode it because we know the final length.
+     *
+     * @param bitSet the bitset representation of the dna sequence
+     * @return the dna sequence represented by the bitset
+     */
+    public static String dnaFrom(final BitSet bitSet) {
+        long number = longFrom(bitSet);         // the base_10 representation of the bit set
+        if (number < 0)
+            throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
+
+        int length = contextLengthFor(number);  // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
+        number -= combinationsFor(length - 1);  // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
+
+        String dna = "";
+        while (number > 0) {                    // perform a simple base_10 to base_4 conversion (quasi-canonical)
+            byte base = (byte) (number % 4);
+            switch (base) {
+                case 0:
+                    dna = "A" + dna;
+                    break;
+                case 1:
+                    dna = "C" + dna;
+                    break;
+                case 2:
+                    dna = "G" + dna;
+                    break;
+                case 3:
+                    dna = "T" + dna;
+                    break;
+            }
+            number /= 4;
+        }
+        for (int j = dna.length(); j < length; j++)
+            dna = "A" + dna;                    // add leading A's as necessary (due to the "quasi" canonical status, see description above)
+
+        return dna;
+    }
+
+    /**
+     * Creates a BitSet representation of a given dna string.
+     *
+     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
+     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
+     * a bitSetFrom(BigNumber) method.
+     *
+     * The bit representation of a dna string is the simple:
+     * 0 A      4 AA     8 CA
+     * 1 C      5 AC     ...
+     * 2 G      6 AG     1343 TTGGT
+     * 3 T      7 AT     1364 TTTTT
+     *
+     * To convert from dna to number, we convert the dna string to base10 and add all combinations that
+     * preceded the string (with smaller lengths).
+     *
+     * @param dna the dna sequence
+     * @return the bitset representing the dna sequence
+     */
+    public static BitSet bitSetFrom(String dna) {
+        if (dna.length() > MAX_DNA_CONTEXT)
+            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
+
+        long baseTen = 0;                                       // the number in base_10 that we are going to use to generate the bit set
+        long preContext = combinationsFor(dna.length() - 1);    // the sum of all combinations that preceded the length of the dna string
+        for (int i = 0; i < dna.length(); i++) {
+            baseTen *= 4;
+            switch (dna.charAt(i)) {
+                case 'A':
+                    baseTen += 0;
+                    break;
+                case 'C':
+                    baseTen += 1;
+                    break;
+                case 'G':
+                    baseTen += 2;
+                    break;
+                case 'T':
+                    baseTen += 3;
+                    break;
+            }
+        }
+        return bitSetFrom(baseTen + preContext);                // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
+    }
+
+    /**
+     * Calculates the number of bits necessary to represent a given number of elements
+     *
+     * @param numberOfElements the number of elements to represent (must be positive)
+     * @return the number of bits necessary to represent this many elements
+     */
+    public static int numberOfBitsToRepresent(long numberOfElements) {
+        if (numberOfElements < 0)
+            throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
+
+        if (numberOfElements == 1L)
+            return 1;   // special case
+
+        int n = 0;
+        numberOfElements--;
+        while (numberOfElements > 0) {
+            numberOfElements = numberOfElements >> 1;
+            n++;
+        }
+        return n;
+    }
+
+    /**
+     * Calculates the length of the DNA context for a given base 10 number
+     *
+     * It is important to know the length given the base 10 number to calculate the number of combinations
+     * and to disambiguate the "quasi-canonical" state.
+     *
+     * This method also calculates the number of combinations as a by-product, but since it memoizes the
+     * results, a subsequent call to combinationsFor(length) is O(1).
+     *
+     * @param number the base 10 representation of the bitset
+     * @return the length of the DNA context represented by this number
+     */
+    private static int contextLengthFor(long number) {
+        int length = 1;                              // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
+        long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
+        while (combinations <= number) {             // find the length of the dna string (length)
+            length++;
+            combinations = combinationsFor(length);  // calculate the next context
+        }
+        return length;
+    }
+
+    /**
+     * The sum of all combinations of a context of a given length from length = 0 to length.
+     *
+     * Memoized implementation of sum(4^i) , where i=[0,length]
+     *
+     * @param length the length of the DNA context
+     * @return the sum of all combinations leading up to this context length.
+     */
+    private static long combinationsFor(int length) {
+        if (length > MAX_DNA_CONTEXT)
+            throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
+
+        // only calculate the number of combinations if the table hasn't already cached the value
+        if (length > 0 && combinationsPerLength[length] == 0) {
+            long combinations = 0L;
+            for (int i = 1; i <= length; i++)
+                combinations += (1L << 2 * i);        // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
+            combinationsPerLength[length] = combinations;
+        }
+        return combinationsPerLength[length];
+    }
+
+
+    public static byte[] sizeOf(Object obj) throws java.io.IOException
+    {
+        ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
+        ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
+        objectOutputStream.writeObject(obj);
+        objectOutputStream.flush();
+        objectOutputStream.close();
+        byteObject.close();
+
+        return byteObject.toByteArray();
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -29,7 +29,6 @@ import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;

 import java.math.BigDecimal;
@ -1527,124 +1526,4 @@ public class MathUtils {

    }

-    /**
-     * Creates an integer out of a bitset
-     *
-     * @param bitSet the bitset
-     * @return an integer with the bitset representation
-     */
-    public static long intFrom(final BitSet bitSet) {
-        long number = 0;
-        for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
-            number |= 1L << bitIndex;
-
-        return number;
-    }
-
-    /**
-     * Creates a BitSet representation of a given integer
-     *
-     * @param number the number to turn into a bitset
-     * @return a bitset representation of the integer
-     */
-    public static BitSet bitSetFrom(long number) {
-        BitSet bitSet = new BitSet();
-        int bitIndex = 0;
-        while (number > 0) {
-            if (number%2 > 0)
-                bitSet.set(bitIndex);
-            bitIndex++;
-            number /= 2;
-        }
-        return bitSet;
-    }
-
-    /**
-     * Converts a BitSet into the dna string representation.
-     *
-     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
-     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
-     * a bitSetFrom(BigNumber) method.
-     *
-     * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
-     * base_10 representation of the sequence. This is important for us to know how to bring the number
-     * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
-     * as 0's and leading 0's are omitted).
-     *
-     * quasi-canonical because A is represented by a 0, therefore,
-     *  instead of : 0, 1, 2, 3, 10, 11, 12, ...
-     *  we have    : 0, 1, 2, 3, 00, 01, 02, ...
-     *
-     * but we can correctly decode it because we know the final length.
-     *
-     * @param bitSet the bitset representation of the dna sequence
-     * @return the dna sequence represented by the bitset
-     */
-    public static String dnaFrom(final BitSet bitSet) {
-        long number = intFrom(bitSet);      // the base_10 representation of the bit set
-        long preContext = 0;                // the number of combinations skipped to get to the quasi-canonical representation (we keep it to subtract later)
-        long nextContext = 4;               // the next context (we advance it so we know which one was preceding it).
-        int i = 1;                          // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
-        while (nextContext <= number) {     // find the length of the dna string (i)
-            preContext = nextContext;       // keep track of the number of combinations in the preceding context
-            nextContext += Math.pow(4, ++i);// calculate the next context
-        }
-        number -= preContext;               // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
-
-        String dna = "";
-        while (number > 0) {                // perform a simple base_10 to base_4 conversion (quasi-canonical)
-            byte base = (byte) (number % 4);
-            switch (base) {
-                case 0 : dna = "A" + dna; break;
-                case 1 : dna = "C" + dna; break;
-                case 2 : dna = "G" + dna; break;
-                case 3 : dna = "T" + dna; break;
-            }
-            number /= 4;
-        }
-        for (int j = dna.length(); j < i; j++)
-            dna = "A" + dna;                // add leading A's as necessary (due to the "quasi" canonical status, see description above)
-
-        return dna;
-    }
-
-    /**
-     * Creates a BitSet representation of a given dna string.
-     *
-     * Warning: This conversion is limited to long precision, therefore the dna sequence cannot
-     * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
-     * a bitSetFrom(BigNumber) method.
-     *
-     * The bit representation of a dna string is the simple:
-     * 0 A      4 AA     8 CA
-     * 1 C      5 AC     ...
-     * 2 G      6 AG     1343 TTGGT
-     * 3 T      7 AT     1364 TTTTT
-     *
-     * To convert from dna to number, we convert the dna string to base10 and add all combinations that
-     * preceded the string (with smaller lengths). 
-     * 
-     * @param dna the dna sequence
-     * @return the bitset representing the dna sequence 
-     */
-    public static BitSet bitSetFrom(String dna) {
-        if (dna.length() > 31)
-            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than 31. dna: %s (%d)", dna, dna.length()));
-        
-        long baseTen = 0;                       // the number in base_10 that we are going to use to generate the bit set 
-        long preContext = 0;                    // the sum of all combinations that preceded the length of the dna string
-        for (int i=0; i<dna.length(); i++) {
-            baseTen *= 4;
-            switch(dna.charAt(i)) {
-                case 'A': baseTen += 0; break;
-                case 'C': baseTen += 1; break;
-                case 'G': baseTen += 2; break;
-                case 'T': baseTen += 3; break;
-            }
-            if (i>0)
-                preContext += Math.pow(4, i);   // each length will have 4^i combinations (e.g 1 = 4, 2 = 16, 3 = 64, ...) 
-        }
-
-        return bitSetFrom(baseTen+preContext);  // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -43,7 +43,7 @@ import java.util.regex.Pattern;

 /**
 * Utility methods to facilitate on-the-fly base quality score recalibration.
- * 
+ *
 * User: rpoplin
 * Date: 2/4/12
 */
@ -58,7 +58,7 @@ public class BaseRecalibration {
    private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.

-    public BaseRecalibration( final File RECAL_FILE ) {
+    public BaseRecalibration(final File RECAL_FILE) {
        // Get a list of all available covariates
        final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();

@ -68,27 +68,29 @@ public class BaseRecalibration {
        // Read in the data from the csv file and populate the data map and covariates list
        boolean sawEOF = false;
        try {
-            for ( String line : new XReadLines(RECAL_FILE) ) {
+            for (String line : new XReadLines(RECAL_FILE)) {
                lineNumber++;
-                if ( EOF_MARKER.equals(line) ) {
+                if (EOF_MARKER.equals(line)) {
                    sawEOF = true;
-                } else if( COMMENT_PATTERN.matcher(line).matches() )  {
+                }
+                else if (COMMENT_PATTERN.matcher(line).matches()) {
                    ; // Skip over the comment lines, (which start with '#')
                }
                // Read in the covariates that were used from the input file
-                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
-                    if( foundAllCovariates ) {
-                        throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
-                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
+                else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
+                    if (foundAllCovariates) {
+                        throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
+                    }
+                    else { // Found the covariate list in input file, loop through all of them and instantiate them
                        String[] vals = line.split(",");
-                        for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
+                        for (int iii = 0; iii < vals.length - 4; iii++) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
                            boolean foundClass = false;
-                            for( Class<?> covClass : classes ) {
-                                if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
+                            for (Class<?> covClass : classes) {
+                                if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
                                    foundClass = true;
                                    try {
-                                        Covariate covariate = (Covariate)covClass.newInstance();
-                                        requestedCovariates.add( covariate );
+                                        Covariate covariate = (Covariate) covClass.newInstance();
+                                        requestedCovariates.add(covariate);
                                    } catch (Exception e) {
                                        throw new DynamicClassResolutionException(covClass, e);
                                    }
@ -96,63 +98,65 @@ public class BaseRecalibration {
                                }
                            }

-                            if( !foundClass ) {
-                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
+                            if (!foundClass) {
+                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
                            }
                        }
                    }

-                } else { // Found a line of data
-                    if( !foundAllCovariates ) {
+                }
+                else { // Found a line of data
+                    if (!foundAllCovariates) {
                        foundAllCovariates = true;

                        // At this point all the covariates should have been found and initialized
-                        if( requestedCovariates.size() < 2 ) {
-                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
+                        if (requestedCovariates.size() < 2) {
+                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
                        }

                        final boolean createCollapsedTables = true;

                        // Initialize any covariate member variables using the shared argument collection
                        RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
-                        for( Covariate cov : requestedCovariates ) {
-                            cov.initialize( RAC );
+                        for (Covariate cov : requestedCovariates) {
+                            cov.initialize(RAC);
                        }
                        // Initialize the data hashMaps
-                        dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
+                        dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());

                    }
                    addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
                }
            }

-        } catch ( FileNotFoundException e ) {
+        } catch (FileNotFoundException e) {
            throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
-        } catch ( NumberFormatException e ) {
+        } catch (NumberFormatException e) {
            throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
        }

-        if ( !sawEOF ) {
+        if (!sawEOF) {
            final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
            throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
        }

-        if( dataManager == null ) {
+        if (dataManager == null) {
            throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
        }

-        dataManager.generateEmpiricalQualities( 1, MAX_QUALITY_SCORE );
+        dataManager.generateEmpiricalQualities(1, MAX_QUALITY_SCORE);
    }
-    
+
    /**
     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
+     *
     * @param line A line of CSV data read from the recalibration table data file
     */
    private void addCSVData(final File file, final String line) {
        final String[] vals = line.split(",");

        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
+        if (vals.length != requestedCovariates.size() + 4) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
        }
@ -160,48 +164,48 @@ public class BaseRecalibration {
        final Object[] key = new Object[requestedCovariates.size()];
        Covariate cov;
        int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
-            cov = requestedCovariates.get( iii );
-            key[iii] = cov.getValue( vals[iii] );
+        for (iii = 0; iii < requestedCovariates.size(); iii++) {
+            cov = requestedCovariates.get(iii);
+            key[iii] = cov.getValue(vals[iii]);
        }
        final String modelString = vals[iii++];
-        final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
+        final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.errorModelFrom(modelString);

        // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
+        final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
        // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        
-        dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
+
+        dataManager.addToAllTables(key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
    }
-    
-    public void recalibrateRead( final GATKSAMRecord read ) {
+
+    public void recalibrateRead(final GATKSAMRecord read) {

        //compute all covariate values for this read
        RecalDataManager.computeCovariates(read, requestedCovariates);
-        final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
+        final CovariateKeySet covariateKeySet = RecalDataManager.covariateKeySetFrom(read);

-        for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
-            final byte[] originalQuals = read.getBaseQualities( errorModel );
+        for (final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values()) {
+            final byte[] originalQuals = read.getBaseQualities(errorModel);
            final byte[] recalQuals = originalQuals.clone();

            // For each base in the read
-            for( int offset = 0; offset < read.getReadLength(); offset++ ) {
-        
+            for (int offset = 0; offset < read.getReadLength(); offset++) {
+
                final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
-                final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
+                final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length - 1); // need to strip off the error mode which was appended to the list of covariates

                // BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
                //Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
                //if( qualityScore == null ) {
-                    final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
+                final byte qualityScore = performSequentialQualityCalculation(errorModel, fullCovariateKey);
                //    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
                //}
-        
+
                recalQuals[offset] = qualityScore;
            }
-        
-            preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
-            read.setBaseQualities( recalQuals, errorModel );
+
+            preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
+            read.setBaseQualities(recalQuals, errorModel);
        }
    }

@ -211,27 +215,28 @@ public class BaseRecalibration {
     *
     * Given the full recalibration table, we perform the following preprocessing steps:
     *
-     *   - calculate the global quality score shift across all data [DeltaQ]
-     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
-     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
-     *   - The final shift equation is:
+     * - calculate the global quality score shift across all data [DeltaQ]
+     * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
+     * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
+     * - The final shift equation is:
+     *
+     * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     *
-     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     * @param key The list of Comparables that were calculated from the covariates
     * @return A recalibrated quality score as a byte
     */
-    private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
+    private byte performSequentialQualityCalculation(final RecalDataManager.BaseRecalibrationType errorModel, final Object... key) {

-        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
+        final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
        final Object[] readGroupCollapsedKey = new Object[1];
        final Object[] qualityScoreCollapsedKey = new Object[2];
        final Object[] covariateCollapsedKey = new Object[3];

        // The global quality shift (over the read group only)
        readGroupCollapsedKey[0] = key[0];
-        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
+        final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0, errorModel).get(readGroupCollapsedKey));
        double globalDeltaQ = 0.0;
-        if( globalRecalDatum != null ) {
+        if (globalRecalDatum != null) {
            final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
            final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
@ -240,9 +245,9 @@ public class BaseRecalibration {
        // The shift in quality between reported and empirical
        qualityScoreCollapsedKey[0] = key[0];
        qualityScoreCollapsedKey[1] = key[1];
-        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
+        final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1, errorModel).get(qualityScoreCollapsedKey));
        double deltaQReported = 0.0;
-        if( qReportedRecalDatum != null ) {
+        if (qReportedRecalDatum != null) {
            final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
        }
@ -252,27 +257,28 @@ public class BaseRecalibration {
        double deltaQCovariateEmpirical;
        covariateCollapsedKey[0] = key[0];
        covariateCollapsedKey[1] = key[1];
-        for( int iii = 2; iii < key.length; iii++ ) {
-            covariateCollapsedKey[2] =  key[iii]; // The given covariate
-            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
-            if( covariateRecalDatum != null ) {
+        for (int iii = 2; iii < key.length; iii++) {
+            covariateCollapsedKey[2] = key[iii]; // The given covariate
+            final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii, errorModel).get(covariateCollapsedKey));
+            if (covariateRecalDatum != null) {
                deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
-                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
+                deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
            }
        }

        final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
-        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
+        return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
    }

    /**
     * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
+     *
     * @param originalQuals The list of original base quality scores
-     * @param recalQuals A list of the new recalibrated quality scores
+     * @param recalQuals    A list of the new recalibrated quality scores
     */
-    private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
-        for( int iii = 0; iii < recalQuals.length; iii++ ) {
-            if( originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
+    private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
+        for (int iii = 0; iii < recalQuals.length; iii++) {
+            if (originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
                recalQuals[iii] = originalQuals[iii];
            }
        }
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@ -213,8 +213,8 @@ public class GATKSAMRecord extends BAMRecord {
        byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
        if( quals == null ) {
            quals = new byte[getBaseQualities().length];
-            Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
-            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
+            Arrays.fill(quals, (byte) 45);  // Some day in the future when base insertion and base deletion quals exist the samtools API will
+                                            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
            setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
        }
        return quals;
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import net.sf.samtools.*;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -495,7 +496,7 @@ public class ReadUtils {
    /**
     * Is a base inside a read?
     *
-     * @param read the read to evaluate
+     * @param read                the read to evaluate
     * @param referenceCoordinate the reference coordinate of the base to test
     * @return true if it is inside the read, false otherwise.
     */
@ -541,9 +542,9 @@ public class ReadUtils {
     *
     * See getCoverageDistributionOfRead for information on how the coverage is calculated.
     *
-     * @param list the list of reads covering the region
+     * @param list          the list of reads covering the region
     * @param startLocation the first reference coordinate of the region (inclusive)
-     * @param stopLocation the last reference coordinate of the region (inclusive)
+     * @param stopLocation  the last reference coordinate of the region (inclusive)
     * @return an array with the coverage of each position from startLocation to stopLocation
     */
    public static int [] getCoverageDistributionOfReads(List<GATKSAMRecord> list, int startLocation, int stopLocation) {
@ -563,9 +564,9 @@ public class ReadUtils {
     * Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
     * reads for variant regions, and deletions count as variants)
     *
-     * @param read the read to get the coverage distribution of
+     * @param read          the read to get the coverage distribution of
     * @param startLocation the first reference coordinate of the region (inclusive)
-     * @param stopLocation the last reference coordinate of the region (inclusive)
+     * @param stopLocation  the last reference coordinate of the region (inclusive)
     * @return an array with the coverage of each position from startLocation to stopLocation
     */
    public static int [] getCoverageDistributionOfRead(GATKSAMRecord read, int startLocation, int stopLocation) {
@ -611,9 +612,9 @@ public class ReadUtils {
     *    Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with value==true meaning it contributes to the coverage.
     *    Example: Read => {true, true, false, ... false}
     *
-     * @param readList the list of reads to generate the association mappings
+     * @param readList      the list of reads to generate the association mappings
     * @param startLocation the first reference coordinate of the region (inclusive)
-     * @param stopLocation the last reference coordinate of the region (inclusive)
+     * @param stopLocation  the last reference coordinate of the region (inclusive)
     * @return the two hashmaps described above
     */
    public static Pair<HashMap<Integer, HashSet<GATKSAMRecord>> , HashMap<GATKSAMRecord, Boolean[]>> getBothReadToLociMappings (List<GATKSAMRecord> readList, int startLocation, int stopLocation) {
@ -622,7 +623,6 @@ public class ReadUtils {
        HashMap<Integer, HashSet<GATKSAMRecord>> locusToReadMap = new HashMap<Integer, HashSet<GATKSAMRecord>>(2*(stopLocation - startLocation + 1), 0.5f);
        HashMap<GATKSAMRecord, Boolean[]> readToLocusMap = new HashMap<GATKSAMRecord, Boolean[]>(2*readList.size(), 0.5f);

-
        for (int i = startLocation; i <= stopLocation; i++)
            locusToReadMap.put(i, new HashSet<GATKSAMRecord>()); // Initialize the locusToRead map with empty lists

@ -631,7 +631,7 @@ public class ReadUtils {

            int [] readCoverage = getCoverageDistributionOfRead(read, startLocation, stopLocation);

-            for (int i=0; i<readCoverage.length; i++) {
+            for (int i = 0; i < readCoverage.length; i++) {
                int refLocation = i + startLocation;
                if (readCoverage[i] > 0) {
                    // Update the hash for this locus
@ -649,6 +649,55 @@ public class ReadUtils {
        return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
    }

+    /**
+     * Create random read qualities
+     *
+     * @param length the length of the read
+     * @return an array with randomized base qualities between 0 and 50
+     */
+    public static byte[] createRandomReadQuals(int length) {
+        Random random = GenomeAnalysisEngine.getRandomGenerator();
+        byte[] quals = new byte[length];
+        for (int i = 0; i < length; i++)
+            quals[i] = (byte) random.nextInt(50);
+        return quals;
+    }
+
+    /**
+     * Create random read qualities
+     *
+     * @param length  the length of the read
+     * @param allowNs whether or not to allow N's in the read
+     * @return an array with randomized bases (A-N) with equal probability
+     */
+    public static byte[] createRandomReadBases(int length, boolean allowNs) {
+        Random random = GenomeAnalysisEngine.getRandomGenerator();
+        int numberOfBases = allowNs ? 5 : 4;
+        byte[] bases = new byte[length];
+        for (int i = 0; i < length; i++) {
+            switch (random.nextInt(numberOfBases)) {
+                case 0:
+                    bases[i] = 'A';
+                    break;
+                case 1:
+                    bases[i] = 'C';
+                    break;
+                case 2:
+                    bases[i] = 'G';
+                    break;
+                case 3:
+                    bases[i] = 'T';
+                    break;
+                case 4:
+                    bases[i] = 'N';
+                    break;
+                default:
+                    throw new ReviewedStingException("Something went wrong, this is just impossible");
+            }
+        }
+        return bases;
+    }
+
    public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) {
        String[] sequenceRecordNames = new String[sequenceDictionary.size()];
        int sequenceRecordIndex = 0;
@ -656,4 +705,5 @@ public class ReadUtils {
            sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName();
        return Arrays.deepToString(sequenceRecordNames);
    }
+
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java
@ -1,9 +1,9 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@ -12,37 +12,13 @@ import java.util.BitSet;
 import java.util.Random;

 /**
- * Short one line description of the walker.
- *
- * <p>
- * [Long description of the walker]
- * </p>
- *
- *
- * <h2>Input</h2>
- * <p>
- * [Description of the Input]
- * </p>
- *
- * <h2>Output</h2>
- * <p>
- * [Description of the Output]
- * </p>
- *
- * <h2>Examples</h2>
- * <pre>
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T [walker name]
- *  </pre>
- *
 * @author Mauricio Carneiro
 * @since 3/1/12
 */
 public class ContextCovariateUnitTest {
    ContextCovariate covariate;
    RecalibrationArgumentCollection RAC;
-    Random random; 
+    Random random;

    @BeforeClass
    public void init() {
@ -55,49 +31,35 @@ public class ContextCovariateUnitTest {

    @Test(enabled = true)
    public void testSimpleContexts() {
-        byte [] quals = createRandomReadQuals(101);
-        byte [] bbases = createRandomReadBases(101);
+        byte[] quals = ReadUtils.createRandomReadQuals(10000);
+        byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
        String bases = stringFrom(bbases);
+        //        System.out.println("Read: " + bases);
        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
        CovariateValues values = covariate.getValues(read);
-        verifyCovariateArray((BitSet []) values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
-        verifyCovariateArray((BitSet []) values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
-        verifyCovariateArray((BitSet []) values.getDeletions(),  RAC.DELETIONS_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
+        verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
    }
-    
+
    private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
-        for (int i=0; i<values.length; i++) {
-            if (i >= contextSize)
-                Assert.assertEquals(MathUtils.dnaFrom(values[i]), bases.substring(i-contextSize, i));
-            else
-                Assert.assertNull(values[i]);
+        for (int i = 0; i < values.length; i++) {
+            String expectedContext = covariate.NO_CONTEXT_VALUE;
+            if (i >= contextSize) {
+                String context = bases.substring(i - contextSize, i);
+                if (!context.contains("N"))
+                    expectedContext = context;
+            }
+            //            System.out.println(String.format("Context [%d]:\n%s\n%s\n", i, covariate.keyFromBitSet(values[i]), expectedContext));
+            Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
        }
    }

-    private String stringFrom(byte [] array) {
+    private String stringFrom(byte[] array) {
        String s = "";
        for (byte value : array)
            s += (char) value;
        return s;
    }

-    private byte [] createRandomReadQuals(int length) {
-        byte [] quals = new byte[length];
-        for (int i=0; i<length; i++)
-            quals[i] = (byte) random.nextInt(50);
-        return quals;
-    }
-
-    private byte [] createRandomReadBases(int length) {
-        byte [] bases = new byte[length];
-        for (int i=0; i<length; i++) {
-            switch(random.nextInt(4)) {
-                case 0: bases[i] = 'A'; break;
-                case 1: bases[i] = 'C'; break;
-                case 2: bases[i] = 'G'; break;
-                case 3: bases[i] = 'T'; break;
-            }
-        }
-        return bases;
-    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java
@ -0,0 +1,68 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.util.BitSet;
+import java.util.Random;
+
+/**
+ * @author Mauricio Carneiro
+ * @since 3/1/12
+ */
+public class CycleCovariateUnitTest {
+    CycleCovariate covariate;
+    RecalibrationArgumentCollection RAC;
+    Random random;
+
+    @BeforeClass
+    public void init() {
+        RAC = new RecalibrationArgumentCollection();
+        covariate = new CycleCovariate();
+        random = GenomeAnalysisEngine.getRandomGenerator();
+        covariate.initialize(RAC);
+    }
+
+    @Test(enabled = true)
+    public void testSimpleCycles() {
+        short readLength = 10;
+        byte[] quals = ReadUtils.createRandomReadQuals(readLength);
+        byte[] bbases = ReadUtils.createRandomReadBases(readLength, true);
+        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
+        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
+        read.getReadGroup().setPlatform("illumina");
+
+        CovariateValues values = covariate.getValues(read);
+        verifyCovariateArray(values.getMismatches(), (short) 1, (short) 1);
+
+        read.setReadNegativeStrandFlag(true);
+        values = covariate.getValues(read);
+        verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
+
+        read.setReadPairedFlag(true);
+        read.setSecondOfPairFlag(true);
+        values = covariate.getValues(read);
+        verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
+
+        read.setReadNegativeStrandFlag(false);
+        values = covariate.getValues(read);
+        verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
+
+    }
+
+    private void verifyCovariateArray(BitSet[] values, short init, short increment) {
+        for (short i = 0; i < values.length; i++) {
+            short actual = Short.decode(covariate.keyFromBitSet(values[i]));
+            int expected = init + (increment * i);
+            //            System.out.println(String.format("%d: %d, %d", i, actual, expected));
+            Assert.assertEquals(actual, expected);
+        }
+    }
+
+}
--- a/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java
@ -0,0 +1,75 @@
+package org.broadinstitute.sting.utils;
+
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.util.Random;
+
+/**
+ * @author Mauricio Carneiro
+ * @since 3/5/12
+ */
+
+public class BitSetUtilsUnitTest {
+    private static int RANDOM_NUMBERS_TO_TRY = 87380;
+    private static Random random;
+
+    @BeforeClass
+    public void init() {
+        random = GenomeAnalysisEngine.getRandomGenerator();
+    }
+
+    @Test(enabled = true)
+    public void testLongBitSet() {
+        long[] numbers = {0L, 1L, 428L, 65536L, 239847L, 4611686018427387903L, Long.MAX_VALUE, Long.MIN_VALUE, -1L, -2L, -7L, -128L, -65536L, -100000L};
+        for (long n : numbers)
+            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);
+
+        for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
+            long n = random.nextLong();
+            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(n)), n);    // Because class Random uses a seed with only 48 bits, this algorithm will not return all possible long values.
+        }
+    }
+
+    @Test(enabled = true)
+    public void testShortBitSet() {
+        short[] numbers = {0, 1, 428, 25934, 23847, 16168, Short.MAX_VALUE, Short.MIN_VALUE, -1, -2, -7, -128, -12312, -31432};
+        for (long n : numbers)
+            Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
+
+        for (int i = 0; i < RANDOM_NUMBERS_TO_TRY; i++) {
+            short n = (short) random.nextInt();
+            Assert.assertEquals(BitSetUtils.shortFrom(BitSetUtils.bitSetFrom(n)), n);
+        }
+    }
+
+    @Test(enabled = true)
+    public void testDNAAndBitSetConversion() {
+        String[] dna = {"AGGTGTTGT", "CCCCCCCCCCCCCC", "GGGGGGGGGGGGGG", "TTTTTTTTTTTTTT", "GTAGACCGATCTCAGCTAGT", "AACGTCAATGCAGTCAAGTCAGACGTGGGTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"};
+
+        // Test all contexts of size 1-8.
+        for (long n = 0; n < RANDOM_NUMBERS_TO_TRY; n++)
+            Assert.assertEquals(BitSetUtils.longFrom(BitSetUtils.bitSetFrom(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(n)))), n);
+
+        // Test the special cases listed in the dna array
+        for (String d : dna)
+            Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
+    }
+
+    @Test(enabled = true)
+    public void testNumberOfBitsToRepresent() {
+        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented 
+        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
+        Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
+
+        for (int i = 1; i < 63; i++) {                                  // Can't test i == 63 because n1 is a negative number
+            long n1 = 1L << i;
+            long n2 = Math.abs(random.nextLong()) % n1;
+            long n3 = n1 | n2;
+            Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
+            Assert.assertEquals(BitSetUtils.numberOfBitsToRepresent(n1), i);
+        }
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
@ -25,7 +25,6 @@

 package org.broadinstitute.sting.utils;

-
 import org.broadinstitute.sting.BaseTest;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
@ -131,7 +130,8 @@ public class MathUtilsUnitTest extends BaseTest {
        int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
        MathUtils.RunningAverage r = new MathUtils.RunningAverage();

-        for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
+        for (int i = 0; i < numbers.length; i++)
+            r.add((double) numbers[i]);

        Assert.assertEquals((long) numbers.length, r.observationCount());
        Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
@ -223,35 +223,6 @@ public class MathUtilsUnitTest extends BaseTest {
        return set.isEmpty();
    }

-    @Test(enabled = true)
-    public void testIntAndBitSetConversion() {
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)),       428);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(239847)),    239847);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(12726)),     12726);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(0)),         0);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(1)),         1);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(65536)),     65536);
-        Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(Long.MAX_VALUE)), Long.MAX_VALUE);
-    }
-
-    @Test(enabled = true)
-    public void testDNAAndBitSetConversion() {
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("ACGT")),                    "ACGT");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AGGTGTTGT")),               "AGGTGTTGT");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("A")),                       "A");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("C")),                       "C");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("G")),                       "G");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("T")),                       "T");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CC")),                      "CC");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AA")),                      "AA");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AAAA")),                    "AAAA");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("CCCCCCCCCCCCCC")),          "CCCCCCCCCCCCCC");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GGGGGGGGGGGGGG")),          "GGGGGGGGGGGGGG");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("TTTTTTTTTTTTTT")),          "TTTTTTTTTTTTTT");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("GTAGACCGATCTCAGCTAGT")),    "GTAGACCGATCTCAGCTAGT");
-        Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")),    "AACGTCAATGCAGTCAAGTCAGACGTGGGTT");  // testing max precision (length == 31)
-    }
-
    @Test
    public void testApproximateLog10SumLog10() {
        Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
@ -267,54 +238,54 @@ public class MathUtilsUnitTest extends BaseTest {
        Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
        Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);

-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3);

-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
-        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3);
+        Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[] {-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3);
    }

    @Test
    public void testNormalizeFromLog10() {
-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8}));
-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8}));
-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, -1.0, -1.1, -7.8}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[] {0.0, 0.0, 0.0, -0.1, -6.8}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[] {-8.9, -6.7, -9.4, 0.0, -8.9}));

-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25}));
-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
-        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -1.0, -1.0, -1.0}), new double[] {0.25, 0.25, 0.25, 0.25}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -1.0}), new double[] {0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301}));
+        Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
    }

    /**
     * Private function used by testNormalizeFromLog10()
     */
    private boolean compareDoubleArrays(double[] b1, double[] b2) {
-        if( b1.length != b2.length ) {
+        if (b1.length != b2.length) {
            return false; // sanity check
        }

-        for( int i=0; i < b1.length; i++ ){
-            if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 )
+        for (int i = 0; i < b1.length; i++) {
+            if (MathUtils.compareDoubles(b1[i], b2[i]) != 0)
                return false;
        }
        return true;