The old recalibrator is replaced with the refactored recalibrator. Added a version message to the logger output. These walkers start at version 2.0.0

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2117 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-23 14:58:33 +00:00 · 2009-11-23 14:58:33 +00:00 · 1d46de6d34
parent dfe7d69471
commit 1d46de6d34
27 changed files with 981 additions and 2807 deletions
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java
@ -1,148 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.recalibration;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.apache.log4j.Logger;
-
-import javax.management.RuntimeErrorException;
-import java.util.*;
-
-public class CovariateCounter {
-    private boolean collapsePos = false;
-    private boolean collapseDinuc = false;
-    private boolean assumeFaultyHeader = false;
-
-    private HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
-
-    protected static Logger logger = Logger.getLogger(CovariateCounter.class);
-
-    public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc, boolean assumeFaultyHeader ) {
-        this.collapsePos = collapsePos;
-        this.collapseDinuc = collapseDinuc;
-        this.assumeFaultyHeader = assumeFaultyHeader;
-
-        for (String readGroup : readGroups ) {
-            RecalDataManager manager = makeManager(readGroup);
-            data.put(readGroup, manager);
-        }
-    }
-
-    private RecalDataManager makeManager(final String readGroup) {
-        return new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
-    }
-
-    /**
-     * Returns the set of readGroup names we are counting covariates for
-     * @return
-     */
-    public Set<String> getReadGroups() {
-        return data.keySet();
-    }
-
-    public boolean isCollapseDinuc() {
-        return collapseDinuc;
-    }
-
-    public boolean isCollapsePos() {
-        return collapsePos;
-    }
-    
-    /**
-     * Returns the number of read groups being managed
-     * @return
-     */
-    public int getNReadGroups() {
-        return data.size();
-    }
-
-    /**
-     * Get the particular RecalData datum associated with readGroup, at machine pos, with reported
-     * quality qual, and with the dinuc context of prevBase, base.  If an example of such a
-     * base has been seen before, returns the associated RecalData.  If not, it creates one, places it in the
-     * system so that subsequent requests will return that object, and returns it.
-     *
-     * @param readGroup
-     * @param pos
-     * @param qual
-     * @param prevBase
-     * @param base
-     * @return
-     */
-    public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
-        if ( ! data.containsKey(readGroup) ) {
-            if ( assumeFaultyHeader ) {
-                logger.info(String.format("Found unexpected read group, but assuming the header was bad, so extending covariates with read group %s", readGroup));
-                RecalDataManager manager = makeManager(readGroup);
-                data.put(readGroup, manager);
-            } else {
-                throw new RuntimeException(String.format("Unexpected read group %s found, there's something wrong with your BAM file's header", readGroup));
-            }
-        }
-        
-
-        byte[] cs = {(byte)prevBase, (byte)base};
-        String s = new String(cs);
-        return data.get(readGroup).expandingGetRecalData(pos, qual, s, true);
-    }
-
-    /**
-     * Get a list of all of the RecalData associated with readGroup
-     *
-     * @param readGroup
-     * @return
-     */
-    public List<RecalData> getRecalData(String readGroup) {
-        return data.get(readGroup).getAll();
-    }
-
-    /**
-     * Updates the recalibration data for the base at offset in the read, associated with readGroup rg.
-     * Correctly handles machine orientation of the read.  I.e., it adds data not by offset in the read
-     * but by implied machine cycle associated with the offset.
-     *
-     * TODO: this whole system is 0-based and therefore inconsisent with the rest of the GATK, where pos is 1-based
-     * TODO: and offset is 0-based.  How very annoying.
-     *
-     * @param rg
-     * @param read
-     * @param offset
-     * @param ref
-     * @return
-     */
-    public int updateDataFromRead( String rg, SAMRecord read, int offset, char ref, boolean useOriginalQuals ) {
-        if ( offset == 0 )
-            throw new RuntimeException("Illegal read offset " + offset + " in read " + read.getReadName());
-
-        int cycle = offset;
-        byte[] bases = read.getReadBases();
-        byte[] quals = RecalDataManager.getQualsForRecalibration(read, useOriginalQuals);
-
-        char base = (char)bases[offset];
-        char prevBase = (char)bases[offset - 1];
-
-        if (read.getReadNegativeStrandFlag()) {
-            ref = BaseUtils.simpleComplement(ref);
-            base = BaseUtils.simpleComplement(base);
-            prevBase = BaseUtils.simpleComplement((char)bases[offset+1]);
-            cycle = read.getReadLength() - (offset + 1);
-        }
-
-        int qual = quals[offset];
-        if ( qual > 0 ) {
-            RecalData datum = getRecalData(rg, cycle, qual, prevBase, base);
-            if (datum != null) datum.inc(base,ref);
-            return 1;
-        } else {
-            return 0;
-        }
-    }
-
-    public void printState() {
-        for ( String readGroup : getReadGroups() ) {
-            for ( RecalData datum : getRecalData(readGroup) ) {
-                if ( datum.N > 0 )
-                    System.out.println(datum);
-            }
-        }
-    }
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java
@ -1,292 +1,485 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;

+import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
+import org.broadinstitute.sting.gatk.refdata.RODRecordList;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.genotype.Variation;
+
+import java.io.PrintStream;
+import java.io.FileNotFoundException;
+import java.util.*;
+
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMReadGroupRecord;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.*;
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.walkers.WalkerName;
-import org.broadinstitute.sting.utils.cmdLine.Argument;
-import org.broadinstitute.sting.utils.*;

-import java.util.*;
-import java.io.PrintStream;
-import java.io.FileNotFoundException;
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */

-@WalkerName("CountCovariates")
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ *
+ * This walker is designed to work as the first pass in a two-pass processing step.
+ * It does a by-locus traversal operating only at sites that are not in dbSNP.
+ * We assume that all mismatches we see are therefore errors.
+ * This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
+ * Since there is a large amount of data one can then calculate an empirical probability of error
+ *   given the particular covariates seen at this site, where p(error) = num mismatches / num observations
+ * The output file is a CSV list of (several covariate values, num observations, num mismatches, empirical quality score)
+ * The first lines of the output file give the name of the covariate classes that were used for this calculation.
+ *
+ * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and must be at the start of the list.
+ * Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
+ */
+
+@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
+@WalkerName( "CountCovariates" )
+//@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality // BUGBUG taken out to match old integration tests
+@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
 public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
-    @Argument(fullName="buggyMaxReadLen", doc="If we see a read longer than this, we assume there's a bug and abort", required=false)
-    public int buggyMaxReadLen = 100000;

-    @Argument(fullName="OUTPUT_FILEROOT", shortName="outroot", required=false, doc="Depreciated output file root -- now use --params to directly specify the file output name")
-    public String OUTPUT_FILEROOT = null; // going to blow up if specified
+    @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
+    private Boolean LIST_ONLY = false;
+    @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are already added for you.", required=false)
+    private String[] COVARIATES = null;
+    @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
+    private boolean USE_ORIGINAL_QUALS = false;
+    @Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
+    private int WINDOW_SIZE = 3;
+    @Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
+    private String RECAL_FILE = "output.recal_data.csv";
+    @Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. For debugging purposes only.")
+    private boolean NO_PRINT_HEADER = false;
+    @Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
+    private boolean VALIDATE_OLD_RECALIBRATOR = false;
+    @Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
+    private boolean USE_SLX_PLATFORM = false;

-    @Argument(fullName="params", shortName="params", required=false, doc="Filename root for the outputted logistic regression training files")
-    public String params = "output.recal_data.csv";
-    
-    @Argument(fullName="MIN_MAPPING_QUALITY", shortName="minmap", required=false, doc="Only use reads with at least this quality score")
-    public int MIN_MAPPING_QUALITY = 1;
+    private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
+    private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
+    private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
+    private int sizeOfReadDatumHashMap = 0;

-    @Argument(fullName="PLATFORM", shortName="pl", required=false, doc="Only calibrate read groups generated from the given platform (default = * for all platforms)")
-    public List<String> platforms = Collections.singletonList("*");
-    //public List<String> platforms = Collections.singletonList("ILLUMINA");
+    private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file
+    private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
+    private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file

-    @Argument(fullName="assumeFaultyHeader", required=false, doc="")
-    public boolean assumeFaultyHeader = false;
+    private final String versionNumber = "2.0.0"; // major version, minor version, and build number

-    //@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
-    public boolean collapsePos = false;
-
-    //@Argument(fullName="collapseDinuc", shortName="collapseDinuc", required=false, doc="")
-    public boolean collapseDinuc = false;
-
-    @Argument(fullName = "useOriginalQuals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
-    public boolean useOriginalQuals = false;
-
-
-    private CovariateCounter covariateCounter = null;
-
-    private long counted_sites = 0; // number of sites used to count covariates
-    private long counted_bases = 0; // number of bases used to count covariates
-    private long skipped_sites = 0; // number of sites skipped because of a dbSNP entry
-
-    // THIS IS A HACK required in order to reproduce the behavior of old (and imperfect) RODIterator and
-    // hence to pass the integration test. The new iterator this code is now using does see ALL the SNPs,
-    // whether masked by overlapping indels/other events or not.
-    //TODO process correctly all the returned dbSNP rods at each location
-
-
-    private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0l, 0l);  // mismatch/base counts for dbSNP loci
-    private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0l, 0l);  // mismatch/base counts for non-dbSNP loci
-    private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0;        // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
-    private static final int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000;   // how often to validate dbsnp mismatch rate (in terms of loci seen)
-    private int lociSinceLastDbsnpCheck = 0;                               // loci since last dbsnp validation
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // initialize
+    //
+    //---------------------------------------------------------------------------------------------------------------

    /**
-     * Initialize the system.  Setup the data CovariateCountry for the read groups in our header
+     * Parse the -cov arguments and create a list of covariates to be used here
+     * Based on the covariates' estimates for initial capacity allocate the data hashmap
     */
    public void initialize() {
-        Set<String> readGroups = new HashSet<String>();
-        for (SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
-            if( readGroup.getAttribute("PL") == null )
-                Utils.warnUser(String.format("PL attribute for read group %s is unset; assuming all reads are supported",readGroup.getReadGroupId()));
-            if( !isSupportedReadGroup(readGroup) )
-                continue;
-            readGroups.add(readGroup.getReadGroupId());
+
+        logger.info( "CovariateCounterWalker version: " + versionNumber );
+
+        // Get a list of all available covariates
+        final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
+   
+        // Print and exit if that's what was requested
+        if ( LIST_ONLY ) {
+            out.println( "Available covariates:" );
+            for( Class<?> covClass : classes ) {
+                out.println( covClass.getSimpleName() );
+            }
+            out.println();
+            
+            System.exit( 0 ); // early exit here because user requested it
        }

        // Warn the user if no dbSNP file was specified
-        if( this.getToolkit().getArguments().DBSNPFile == null ) {
+        boolean foundDBSNP = false;
+        for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
+            if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
+                foundDBSNP = true;
+            }
+        }
+        if( !foundDBSNP ) {
            Utils.warnUser("This calculation is critically dependent on being able to skip over known variant sites. Are you sure you want to be running without a dbSNP rod specified?");
        }

-        covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc, assumeFaultyHeader);
-        logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups()));
-    }

-    // --------------------------------------------------------------------------------------------------------------
-    //
-    // map
-    //
-    // --------------------------------------------------------------------------------------------------------------
+        // Initialize the requested covariates by parsing the -cov argument
+        requestedCovariates = new ArrayList<Covariate>();
+        int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
+        if( VALIDATE_OLD_RECALIBRATOR ) {
+            requestedCovariates.add( new ReadGroupCovariate() );
+            requestedCovariates.add( new CycleCovariate() ); // unfortunately order is different here in order to match the old recalibrator exactly
+            requestedCovariates.add( new QualityScoreCovariate() );
+            requestedCovariates.add( new DinucCovariate() );
+        } else if( COVARIATES != null ) {
+            if(COVARIATES[0].equalsIgnoreCase("ALL")) { // the user wants ALL covariates to be used
+                requestedCovariates.add( new ReadGroupCovariate() ); // first add the required covariates then add the rest by looping over all implementing classes that were found
+                requestedCovariates.add( new QualityScoreCovariate() );
+                for( Class<?> covClass : classes ) {
+                    try {
+                        Covariate covariate = (Covariate)covClass.newInstance();
+                        estimatedCapacity *= covariate.estimatedNumberOfBins();
+                        // Some covariates need parameters (user supplied command line arguments) passed to them
+                        if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
+                        if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // these were already added so don't add them again
+                            requestedCovariates.add( covariate );
+                        }
+                    } catch ( InstantiationException e ) {
+                        throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
+                    } catch ( IllegalAccessException e ) {
+                        throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
+                    }
+               }
+            } else { // The user has specified a list of several covariates
+                int covNumber = 1;
+                for( String requestedCovariateString : COVARIATES ) {
+                    boolean foundClass = false;
+                    for( Class<?> covClass : classes ) {
+                        if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
+                            foundClass = true;
+                            // Read Group Covariate and Quality Score Covariate are required covariates for the recalibration calculation and must begin the list
+                            if( (covNumber == 1 && !requestedCovariateString.equalsIgnoreCase( "ReadGroupCovariate" )) ||
+                                (covNumber == 2 && !requestedCovariateString.equalsIgnoreCase( "QualityScoreCovariate" )) ) {
+                                throw new StingException("ReadGroupCovariate and QualityScoreCovariate are required covariates for the recalibration calculation and must begin the list" );
+                            }
+                            covNumber++;
+                            try {
+                                // Now that we've found a matching class, try to instantiate it
+                                Covariate covariate = (Covariate)covClass.newInstance();
+                                estimatedCapacity *= covariate.estimatedNumberOfBins();
+                                // Some covariates need parameters (user supplied command line arguments) passed to them
+                                if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
+                                requestedCovariates.add( covariate );
+                            } catch ( InstantiationException e ) {
+                                throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
+                            } catch ( IllegalAccessException e ) {
+                                throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
+                            }
+                        }
+                    }

-    /**
-     * Walk over each read in the locus pileup and update the covariate counts based on these bases and their
-     * matching (or not) with ref.  dbSNP aware, so avoids sites that are known as SNPs in DBSNP.
-     *
-     * @param tracker
-     * @param ref
-     * @param context
-     * @return
-     */
-    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-
-	rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
-
-//        long testpos =  10410913 ;
-//        if ( ref.getLocus().getStart() ==  testpos ) {
-//            System.out.println(rods.size()+" rods:");
-//            for ( ReferenceOrderedDatum d : rods.getRecords() ) System.out.println((((rodDbSNP)d).isSNP()?"SNP":"non-SNP")+" at " + d.getLocation());
-//          System.exit(1);
-//        }
-
-        if ( dbsnp == null ) {
-            // We aren't at a dbSNP position that's a SNP, so update the read
-
- //           if ( ref.getLocus().getStart() == testpos)  System.out.println("NOT A SNP INDEED");
-            List<SAMRecord> reads = context.getReads();
-            List<Integer> offsets = context.getOffsets();
-            for (int i =0; i < reads.size(); i++ ) {
-                SAMRecord read = reads.get(i);
-
-                if ( read.getReadLength() > buggyMaxReadLen ) {
-                    throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
-                }
-
-                SAMReadGroupRecord readGroup = read.getReadGroup();
-
-                if ( readGroup == null ) {
-                    throw new RuntimeException("No read group annotation found for read " + read.format());
-                }
-
-                if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) {
-                    int offset = offsets.get(i);
-                    if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count
-                        counted_bases += covariateCounter.updateDataFromRead(readGroup.getReadGroupId(), read, offset, ref.getBase(), useOriginalQuals);
+                    if( !foundClass ) {
+                        throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." );
                    }
                }
            }
-            counted_sites += 1;
-            updateMismatchCounts(novel_counts, context, ref.getBase());
-        } else {
- //           if ( ref.getLocus().getStart() == testpos)  System.out.println("TREATED AS A SNP");
-   //         out.println(ref.getLocus()+" SNP at "+dbsnp.getLocation() );
-            updateMismatchCounts(dbSNP_counts, context, ref.getBase());
-            skipped_sites += 1;
+        } else { // No covariates were specified by the user so add the default, required ones
+            Utils.warnUser( "Using default set of covariates because none were specified. Using ReadGroupCovariate and QualityScoreCovariate only." );
+            requestedCovariates.add( new ReadGroupCovariate() );
+            requestedCovariates.add( new QualityScoreCovariate() );
+            estimatedCapacity = 300 * 40;
        }

-        if ( ++lociSinceLastDbsnpCheck == DBSNP_VALIDATION_CHECK_FREQUENCY ) {
-            lociSinceLastDbsnpCheck = 0;
-            validateDbsnpMismatchRate();
-        }
+        logger.info( "The covariates being used here: " );
+        logger.info( requestedCovariates );

-        return 1;
+        if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; }  // Don't want to crash with out of heap space exception
+        dataManager = new RecalDataManager( estimatedCapacity );
+        readDatumHashMap = new IdentityHashMap<SAMRecord, ReadHashDatum>();
    }


-    /**
-     * Update the mismatch / total_base counts for a given class of loci.
-     *
-     * @param counts
-     * @param context
-     * @return
-     */
-    private static void updateMismatchCounts(Pair<Long, Long> counts, AlignmentContext context, char ref) {
-        List<SAMRecord> reads = context.getReads();
-        List<Integer> offsets = context.getOffsets();
-        for (int i =0; i < reads.size(); i++ ) {
-            char readChar = reads.get(i).getReadString().charAt(offsets.get(i));
-            int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
-            int refCharBaseIndex  = BaseUtils.simpleBaseToBaseIndex(ref);
-
-            if ( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
-                if ( readCharBaseIndex != refCharBaseIndex )
-                    counts.first++;
-                counts.second++;
-            }
-        }
-    }
-
-    /**
-     * Validate the dbSNP mismatch rates.
-     *
-     * @return
-     */
-    private void validateDbsnpMismatchRate() {
-        if ( novel_counts.second == 0 || dbSNP_counts.second == 0 )
-            return;
-        
-        double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
-        double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
-        //System.out.println(String.format("dbSNP rate = %.2f, novel rate = %.2f", fractionMM_dbsnp, fractionMM_novel));
-
-        if ( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel )
-            Utils.warnUser("The variation rate of dbSNP sites seems suspicious!  Please double-check that the correct DBSNP ROD is being used.");
-    }
-
-    /**
-     * Check to see whether this read group should be processed.  Returns true if the
-     * read group is in the list of platforms to process or the platform == *, indicating
-     * that all platforms should be processed.
-     *
-     * @param readGroup
-     * @return
-     */
-    private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) {
-        for( String platform: platforms ) {
-            platform = platform.trim();
-            if( platform.equals("*") || readGroup == null ||
-                    readGroup.getAttribute("PL") == null ||
-                    readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) )
-                return true;
-        }
-
-        return false;
-    }
-
-    // --------------------------------------------------------------------------------------------------------------
+    //---------------------------------------------------------------------------------------------------------------
    //
-    // Reduce
+    // map
    //
-    // --------------------------------------------------------------------------------------------------------------
+    //---------------------------------------------------------------------------------------------------------------

    /**
-     * Provide an initial value for reduce computations.
-     * @return Initial value of reduce.
+     * For each read at this locus get the various covariate values and increment that location in the map based on
+     *   whether or not the base matches the reference at this particular location
+     * @param tracker The reference metadata tracker
+     * @param ref The reference context
+     * @param context The alignment context
+     * @return Returns 1, but this value isn't used in the reduce step
     */
-    public PrintStream reduceInit() {
-        try {
-            if ( OUTPUT_FILEROOT != null )
-                throw new RuntimeException("OUTPUT_FILEROOT argument has been removed, please use --params from now on to directly specify the output parameter filename");
-            return new PrintStream( params );
-        } catch ( FileNotFoundException e ) {
-            throw new RuntimeException("Couldn't open output file", e);
-        }
-    }
+    public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {

-    public void onTraversalDone(PrintStream recalTableStream) {
-        out.printf("Writing raw recalibration data...");
-        writeRecalTable(recalTableStream);
-        out.printf("...done%n");
-        
-        //out.printf("Writing logistic recalibration data%n");
-        //writeLogisticRecalibrationTable();
-        //out.printf("...done%n");
-
-        recalTableStream.close();
-
-    }
-
-    /**
-     * Prints some basic information about the CountCovariates run to the output stream out
-     * @param out
-     */
-    private void printInfo(PrintStream out) {
-        //out.printf("# date             \"%s\"%n", new Date());
-        out.printf("# collapsed_pos    %b%n", collapsePos);
-        out.printf("# collapsed_dinuc  %b%n", collapseDinuc);
-        out.printf("# counted_sites    %d%n", counted_sites);
-        out.printf("# counted_bases    %d%n", counted_bases);
-        out.printf("# skipped_sites    %d%n", skipped_sites);
-        out.printf("# fraction_skipped 1 / %.0f bp%n", (double)counted_sites / skipped_sites);
-    }
-
-    /**
-     * Writes out the key recalibration data collected from the reads.  Dumps this recalibration data
-     * as a CVS string to the recalTableOut PrintStream.  Emits the data for all read groups into this file.
-     */
-    private void writeRecalTable(PrintStream recalTableStream) {
-        printInfo(recalTableStream);
-
-        recalTableStream.println("rg,pos,Qrep,dn,nBases,nMismatches,Qemp");
-        for (String readGroup : new TreeSet<String>(covariateCounter.getReadGroups()) ) {
-            for ( RecalData datum: RecalData.sort(covariateCounter.getRecalData(readGroup)) ) {
-                if ( datum.N > 0 ) {
-                    recalTableStream.println(datum.toCSVString(collapsePos));
+        // pull out anything passed by -B name,type,file that has the name "dbsnp"
+        final RODRecordList<ReferenceOrderedDatum> dbsnpRODs = tracker.getTrackData( "dbsnp", null );
+        boolean isSNP = false;
+        if (dbsnpRODs != null) {
+            for( ReferenceOrderedDatum rod : dbsnpRODs ) {
+                if( ((Variation)rod).isSNP() ) {
+                    isSNP = true; // at least one of the rods says this is a snp site
+                    break;
                }
            }
        }
+
+        // Only use data from non-dbsnp sites
+        // Assume every mismatch at a non-dbsnp site is indicitive of poor quality
+        if( !isSNP ) {
+            final List<SAMRecord> reads = context.getReads();
+            final List<Integer> offsets = context.getOffsets();
+            SAMRecord read;
+            int offset;
+            String readGroupId;
+            byte[] quals;
+            byte[] bases;
+            byte refBase;
+            byte prevBase;
+            String platform;
+            byte[] colorSpaceQuals;
+            ReadHashDatum readDatum;
+            boolean isNegStrand;
+            int mappingQuality;
+            int length;
+
+            final int numReads = reads.size();
+            // For each read at this locus
+            for( int iii = 0; iii < numReads; iii++ ) {
+                read = reads.get(iii);
+                offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
+
+                readDatum = readDatumHashMap.get( read );
+                if( readDatum == null ) {
+
+                    // If the HashMap of read objects has grown too large then throw out the (mostly stale) reads
+                    if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger?
+                        readDatumHashMap.clear();
+                        sizeOfReadDatumHashMap = 0;
+                    }
+
+                    // This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
+                    quals = read.getBaseQualities();
+                    // Check if we need to use the original quality scores instead
+                    if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
+                        Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
+                        if ( obj instanceof String )
+                            quals = QualityUtils.fastqToPhred((String)obj);
+                        else {
+                            throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
+                        }
+                    }
+                    bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
+                    isNegStrand = read.getReadNegativeStrandFlag();
+                    final SAMReadGroupRecord readGroup = read.getReadGroup();
+                    readGroupId = readGroup.getReadGroupId();
+                    platform = readGroup.getPlatform();
+                    mappingQuality = read.getMappingQuality();
+                    length = bases.length;
+                    if( USE_SLX_PLATFORM ) {
+                        platform = "ILLUMINA";
+                    }
+
+                    readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length );
+                    readDatumHashMap.put( read, readDatum );
+                    sizeOfReadDatumHashMap++;
+                }
+
+                if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
+
+                    // skip first and last base because there is no dinuc
+                    // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
+                    if( offset > 0 ) {
+                        if( offset < readDatum.length - 1 ) {
+                            // skip if base quality is zero
+                            if( readDatum.quals[offset] > 0 ) {
+
+                                refBase = (byte)ref.getBase();
+                                prevBase = readDatum.bases[offset-1];
+
+                                // Get the complement base strand if we are a negative strand read
+                                if( readDatum.isNegStrand ) {
+                                    prevBase = readDatum.bases[offset+1];
+                                }
+
+                                // skip if this base or the previous one was an 'N' or etc.
+                                if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) {
+
+                                    // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
+                                    colorSpaceQuals = null;
+                                    if( readDatum.platform.equalsIgnoreCase("SOLID") ) {
+                                        colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
+                                    }
+                                    if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
+                                    {
+                                        // This base finally passed all the checks, so add it to the big hashmap
+                                        updateDataFromRead( readDatum, offset, refBase );
+                                    }
+                                } else {
+                                    if( VALIDATE_OLD_RECALIBRATOR ) {
+                                        countedBases++; // replicating a small bug in the old recalibrator
+                                    }
+                                }
+                            }
+                        } else { // at the last base in the read so we can remove it from our IdentityHashMap
+                            readDatumHashMap.remove( read );
+                            sizeOfReadDatumHashMap--;
+                        }
+                    }
+                }
+            }
+            countedSites++;
+
+        } else { // We skipped over the dbSNP site
+            skippedSites++;
+        }
+        
+        return 1; // This value isn't actually used anywhere
    }

    /**
-     * Doesn't do anything
-     *
-     * @param empty
-     * @param recalTableStream
-     * @return
+     * Major workhorse routine for this walker.
+     * Loop through the list of requested covariates and pick out the value from the read, offset, and reference
+     * Using the list of covariate values as a key, pick out the RecalDatum and increment,
+     *   adding one to the number of observations and potentially one to the number of mismatches
+     * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
+     *   because pulling things out of the SAMRecord is an expensive operation.
+     * @param readDatum The ReadHashDatum holding all the important properties of this read
+     * @param offset The offset in the read for this locus
+     * @param refBase The reference base at this locus
     */
-    public PrintStream reduce(Integer empty, PrintStream recalTableStream) {
-        return recalTableStream;
+    private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) {
+
+        List<Comparable> key = new ArrayList<Comparable>();
+        
+        // Loop through the list of requested covariates and pick out the value from the read, offset, and reference
+        for( Covariate covariate : requestedCovariates ) {
+        	key.add( covariate.getValue( readDatum, offset ) );
+        }
+
+    	// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
+        RecalDatum datum = dataManager.data.get( key );
+        if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
+            datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
+            if( VALIDATE_OLD_RECALIBRATOR ) {
+                dataManager.data.myPut( key, datum );
+            } else {
+                dataManager.data.put( key, datum );
+            }
+        }
+        
+        // Need the bases to determine whether or not we have a mismatch
+        byte base = readDatum.bases[offset];
+        
+        // Add one to the number of observations and potentially one to the number of mismatches
+        datum.increment( (char)base, (char)refBase ); // dangerous: if you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
+        countedBases++;
+    }
+
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // reduce
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Initialize the reudce step by creating a PrintStream from the filename specified as an argument to the walker.
+     * @return returns A PrintStream created from the -rf filename
+     */
+    public PrintStream reduceInit() {
+        try {
+            return new PrintStream( RECAL_FILE );
+        } catch ( FileNotFoundException e ) {
+            throw new RuntimeException( "Couldn't open output file: ", e );
+        }
+    }
+
+    /**
+     * The Reduce method doesn't do anything for this walker.
+     * @param value Result of the map. This value is immediately ignored.
+     * @param recalTableStream The PrintStream used to output the CSV data
+     * @return returns The PrintStream used to output the CSV data
+     */
+    public PrintStream reduce( Integer value, PrintStream recalTableStream ) {
+        return recalTableStream; // nothing to do here
+    }
+
+    /**
+     * Write out the full data hashmap to disk in CSV format
+     * @param recalTableStream The PrintStream to write out to
+     */
+    public void onTraversalDone( PrintStream recalTableStream ) {
+        logger.info( "Writing raw recalibration data..." );
+        outputToCSV( recalTableStream );
+        logger.info( "...done!" );
+    
+        recalTableStream.close();
+    }
+
+    /**
+     * For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
+     * @param recalTableStream The PrintStream to write out to
+     */
+    private void outputToCSV( final PrintStream recalTableStream ) {
+
+        if( VALIDATE_OLD_RECALIBRATOR ) {
+            recalTableStream.printf("# collapsed_pos    false%n");
+            recalTableStream.printf("# collapsed_dinuc  false%n");
+            recalTableStream.printf("# counted_sites    %d%n", countedSites);
+            recalTableStream.printf("# counted_bases    %d%n", countedBases);
+            recalTableStream.printf("# skipped_sites    %d%n", skippedSites);
+            recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
+            recalTableStream.printf("rg,pos,Qrep,dn,nBases,nMismatches,Qemp%n");
+            // For each entry in the data hashmap
+            for( Pair<List<? extends Comparable>,RecalDatum> entry : dataManager.data.entrySetSorted4() ) {
+                // For each Covariate in the key
+                for( Comparable comp : entry.first ) {
+                    // Output the Covariate's value
+                    recalTableStream.print( comp + "," );
+                }
+                // Output the RecalDatum entry
+                recalTableStream.println( entry.second.outputToCSV() );
+            }
+        } else {
+            if( !NO_PRINT_HEADER ) {
+                recalTableStream.printf("# Counted Sites    %d%n", countedSites);
+                recalTableStream.printf("# Counted Bases    %d%n", countedBases);
+                recalTableStream.printf("# Skipped Sites    %d%n", skippedSites);
+                recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
+                for( Covariate cov : requestedCovariates ) {
+                    // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
+                    recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
+                }
+            }
+            // For each entry in the data hashmap
+            for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
+                // For each Covariate in the key
+                for( Comparable comp : entry.getKey() ) {
+                    // Output the Covariate's value
+                    if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
+                    recalTableStream.print( comp + "," );
+                }
+                // Output the RecalDatum entry
+                recalTableStream.println( entry.getValue().outputToCSV() );
+            }
+        }
    }
 }
+
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import org.broadinstitute.sting.utils.StingException;
 import org.broadinstitute.sting.utils.Utils;
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Dinuc.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Dinuc.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 /**
 * Created by IntelliJ IDEA.
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import org.broadinstitute.sting.utils.Pair;
 import org.broadinstitute.sting.utils.StingException;
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.Utils;
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import org.broadinstitute.sting.utils.StingException;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import net.sf.samtools.SAMRecord;

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadHashDatum.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 /**
 * Created by IntelliJ IDEA.
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalData.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalData.java
@ -1,230 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.recalibration;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.BaseUtils;
-
-import java.util.List;
-import java.util.Collections;
-
-public class RecalData implements Comparable<RecalData> {
-    //
-    // context of this data -- read group, position in read, reported quality, and dinuc
-    //
-    String readGroup;       // the read group where the datum was observed
-    int pos;                // the position of this recalibration datum
-    int qual;               // the reported quality scoree of this datum
-    String dinuc;           // the dinucleotide context of this datum
-
-    //
-    // empirical quality score data
-    //
-    long N;                 // total number of observed bases
-    long B;                 // number of observed mismatches to the reference base
-
-    
-    public RecalData(int pos, int qual, String readGroup, String dinuc ) {
-        this.pos = pos;
-        this.qual = qual;
-        this.readGroup = readGroup;
-        this.dinuc = dinuc;
-    }
-
-    public RecalData( RecalData copy ) {
-        this.pos = copy.pos;
-        this.qual = copy.qual;
-        this.readGroup = copy.readGroup;
-        this.dinuc = copy.dinuc;
-        this.N = copy.N;
-        this.B = copy.B;
-    }
-
-    public int compareTo(RecalData to) {
-        int cmpReadGroup = readGroup.compareTo(to.readGroup);
-        if (cmpReadGroup != 0) return cmpReadGroup;
-
-        int cmpPos = new Integer(pos).compareTo(to.pos);
-        if (cmpPos != 0) return cmpPos;
-
-        int cmpQual = new Integer(qual).compareTo(to.qual);
-        if (cmpQual != 0) return cmpQual;
-
-        return dinuc.compareTo(to.dinuc);
-    }
-
-    /**
-     * Returns the expected number of mismatching bases for this datum, using the number of bases N and
-     * the reported qual score.  Effectively qual implied error rate * number of bases.
-     * 
-     * @return
-     */
-    public double getExpectedErrors() {
-        return QualityUtils.qualToErrorProb((byte)qual) * N;
-    }
-
-    //
-    // Increment routines
-    //
-    public RecalData inc(long incN, long incB) {
-        N += incN;
-        B += incB;
-        return this;
-    }
-
-    /**
-     * Add N and B from other to self
-     *
-     * @param other
-     * @return
-     */
-    public RecalData inc(final RecalData other) {
-        return inc(other.N, other.B);
-    }
-
-    /**
-     * Add all of the Ns and Bs for data to self
-     *
-     * @param data
-     * @return
-     */
-    public RecalData inc(List<RecalData> data) {
-        for ( RecalData other : data ) {
-            this.inc(other);
-        }
-        return this;
-    }
-
-    /**
-     * Count an empirical observation of our current base against the reference base.
-     * Increments N and, if curBase != ref, also increments B
-     *
-     * @param curBase
-     * @param ref
-     * @return
-     */
-    public RecalData inc(char curBase, char ref) {
-        return inc(1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1);
-        //out.printf("%s %s\n", curBase, ref);
-    }
-
-
-    /**
-     * Get the integer dinuc index associated with this datum.  Can return -1 if dinuc
-     * contains unexpected bases.  Returns 0 if dinuc contains the * operator (not tracking dinucs)
-     * @return
-     */
-    public int getDinucIndex() {
-        return dinucIndex(this.dinuc);
-    }
-
-    /**
-     * Calculates the empirical quality score for this datum from B and N.  Bounds the result
-     * by QualityUtils.MAX_REASONABLE_Q_SCORE.
-     *
-     * @return
-     */
-    public double empiricalQualDouble(final boolean useRawQempirical) {
-        double doubleB = useRawQempirical ? B : B + 1;
-        double doubleN = useRawQempirical ? N : N + 1;
-        double empiricalQual = -10 * Math.log10(doubleB / doubleN);
-        if (empiricalQual > QualityUtils.MAX_REASONABLE_Q_SCORE) empiricalQual = QualityUtils.MAX_REASONABLE_Q_SCORE;
-        return empiricalQual;
-    }
-    public double empiricalQualDouble() { return empiricalQualDouble(true); }
-
-
-    /**
-     * As as empiricalQualDouble, but rounded and encoded as a byte for placement in a read quality array of bytes
-     * @return
-     */
-    public byte empiricalQualByte(final boolean useRawQempirical) {
-        double doubleB = useRawQempirical ? B : B + 1;
-        double doubleN = useRawQempirical ? N : N + 1;
-        return QualityUtils.probToQual(1.0 - doubleB / doubleN);
-    }
-    public byte empiricalQualByte() { return empiricalQualByte(true); }
-
-
-    public static String headerString() {
-        return ("pos, rg, dinuc, qual, emp_qual, qual_diff, n, b");
-    }
-
-    public String toString() {
-        return String.format("[rg=%s, pos=%3d, Qrep=%3d, dinuc=%s, Qemp=%2.2f / %2.2f, B=%d, N=%d]", readGroup, pos, qual, dinuc, empiricalQualDouble(), empiricalQualDouble(false), B, N);
-        //return String.format("%3d,%s,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, readGroup, dinuc, qual, empiricalQual, qual-empiricalQual, N, B);
-    }
-
-    //
-    // To and from CSV strings
-    //
-    public String toCSVString(boolean collapsedPos) {
-        // rg,pos,Qrep,dn,nBases,nMismatches,Qemp
-        return String.format("%s,%s,%d,%s,%d,%d,%d", readGroup, collapsedPos ? "*" : pos, qual, dinuc, N, B, empiricalQualByte());
-    }
-
-    /**
-     * Parses the string s and returns the encoded RecalData in it.  Assumes data has been emitted by
-     * toCSVString() routine.  Order is readGroup, dinuc, qual, pos, N, B, Qemp as byte
-     * @param s
-     * @return
-     */
-    public static RecalData fromCSVString(String s) throws NumberFormatException {
-        String[] vals = s.split(",");
-        String rg = vals[0];
-        int pos = vals[1].equals("*") ? 0 : Integer.parseInt(vals[1]);
-        int qual = Integer.parseInt(vals[2]);
-        String dinuc = vals[3];
-        int N = Integer.parseInt(vals[4]);
-        int B = Integer.parseInt(vals[5]);
-        RecalData datum = new RecalData(pos, qual, rg, dinuc);
-        datum.B = B;
-        datum.N = N;
-
-        // Checking for badness
-        if ( pos < 0 ) throw new NumberFormatException("Illegal position detected: " + pos);
-        if ( B < 0 ) throw new NumberFormatException("Illegal mismatch count detected: " + B);
-        if ( N < 0 ) throw new NumberFormatException("Illegal base count detected: " + N);
-        if ( qual < 0 || qual > QualityUtils.MAX_QUAL_SCORE ) throw new NumberFormatException("Illegal qual detected: " + qual);
-
-        return datum;
-    }
-
-
-    public static List<RecalData> sort(List<RecalData> l) {
-        Collections.sort(l);
-        return l;
-    }
-
-    public static int bases2dinucIndex(char prevBase, char base) {
-        int pbI = BaseUtils.simpleBaseToBaseIndex(prevBase);
-        int bI = BaseUtils.simpleBaseToBaseIndex(base);
-        return (pbI == -1 || bI == -1) ? -1 : pbI * 4 + bI;
-    }
-
-    public final static int NDINUCS = 16;
-    public static String dinucIndex2bases(int index) {
-        char data[] = {BaseUtils.baseIndexToSimpleBase(index / 4), BaseUtils.baseIndexToSimpleBase(index % 4)};
-        return new String( data );
-    }
-
-    public static int dinucIndex(String s) {
-        return bases2dinucIndex(s.charAt(0), s.charAt(1));
-    }
-
-    public static int dinucIndex(byte prevBase, byte base) {
-        return bases2dinucIndex((char)prevBase, (char)base);
-    }
-
-    public static double combinedQreported(List<RecalData> l) {
-        double sumExpectedErrors = 0;
-        long nBases = 0;
-        for ( RecalData datum : l ) {
-            sumExpectedErrors += datum.getExpectedErrors();
-            nBases += datum.N;
-        }
-
-        double q = QualityUtils.phredScaleErrorRate(sumExpectedErrors / nBases);
-        //System.out.printf("expected errors=%f, nBases = %d, rate=%f, qual=%f%n",
-        //        sumExpectedErrors, nBases, 1 - sumExpectedErrors / nBases, q);
-        return q;
-    }
-}
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@ -1,195 +1,204 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;

-import org.broadinstitute.sting.utils.ExpandingArrayList;
 import org.broadinstitute.sting.utils.QualityUtils;

 import java.util.*;

-import net.sf.samtools.SAMRecord;
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */

 /**
 * Created by IntelliJ IDEA.
- * User: mdepristo
- * Date: Jun 16, 2009
- * Time: 9:55:10 PM
- * To change this template use File | Settings | File Templates.
+ * User: rpoplin
+ * Date: Nov 6, 2009
+ *
+ * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
 */
+
 public class RecalDataManager {
-    /** The original quality scores are stored in this attribute */
-    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
+    public NHashMap<RecalDatum> data; // the full dataset
+    private NHashMap<RecalDatum> dataCollapsedReadGroup; // table where everything except read group has been collapsed
+    private NHashMap<RecalDatum> dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
+    private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // tables where everything except read group, quality score, and given covariate has been collapsed
+    public NHashMap<Double> dataSumExpectedErrors; // table used to calculate the overall aggregate quality score in which everything except read group is collapsed

-    ArrayList<RecalData> flattenData = new ArrayList<RecalData>();
-    boolean trackPos, trackDinuc;
-    String readGroup;
-    int nDinucs, maxReadLen;
+    private NHashMap<Double> dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
+    private NHashMap<Double> dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
+    private ArrayList<NHashMap<Double>> dataCollapsedByCovariateDouble; // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed

-    //RecalData[][][] data = null;
-    ExpandingArrayList<ExpandingArrayList<ExpandingArrayList<RecalData>>> data = null;

-    public RecalDataManager(String readGroup,
-                            //int maxReadLen, int maxQual, int nDinucs,
-                            boolean trackPos, boolean trackDinuc) {
-        data = new ExpandingArrayList<ExpandingArrayList<ExpandingArrayList<RecalData>>>();
-        //data = new RecalData[maxReadLen+1][maxQual+1][nDinucs];
+    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // the tag that holds the original quality scores
+    public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // the tag that holds the color space quality scores for SOLID bams

-        this.readGroup = readGroup;
-        this.trackPos = trackPos;
-        this.trackDinuc = trackDinuc;
-        //this.maxReadLen = maxReadLen;
-        //this.nDinucs = nDinucs;
+    RecalDataManager() {
+    	data = new NHashMap<RecalDatum>();
    }

-    public int getMaxReadLen() {
-        return data.size();
-    }
-
-    public int getPosIndex(int pos) {
-        return trackPos ? pos : 0;
-    }
-
-    public int canonicalPos(int cycle) {
-        return getPosIndex(cycle);
-    }
-
-
-    public int getDinucIndex(String dinuc) {
-        return trackDinuc ? RecalData.dinucIndex(dinuc) : 0;
-    }
-
-    public int getDinucIndex(byte prevBase, byte base) {
-        return trackDinuc ? RecalData.dinucIndex(prevBase, base) : 0;
-    }
-
-    public String canonicalDinuc(String dinuc) {
-        return trackDinuc ? dinuc : "**";
-    }
-
-    public void addDatum(RecalData datum) {
-        if ( ! datum.readGroup.equals(this.readGroup) ) {
-            throw new RuntimeException(String.format("BUG: adding incorrect read group datum %s to RecalDataManager for %s", datum.readGroup, this.readGroup));
-        }
-
-        RecalData prev = getRecalData(datum.pos, datum.qual, datum.dinuc);
-        if ( prev != null ) {
-            if ( trackDinuc && trackPos )
-                throw new RuntimeException(String.format("Duplicate entry discovered: %s vs. %s", getRecalData(datum.pos, datum.qual, datum.dinuc), datum));
-            prev.inc(datum);
+    RecalDataManager( final int estimatedCapacity, final boolean createCollapsedTables, final int numCovariates ) {
+    	if( createCollapsedTables ) { // initialize all the collapsed tables
+            dataCollapsedReadGroup = new NHashMap<RecalDatum>();
+            dataCollapsedQualityScore = new NHashMap<RecalDatum>();
+            dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
+            for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
+                dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
+            }
+            dataSumExpectedErrors = new NHashMap<Double>();
        } else {
-            int posIndex = getPosIndex(datum.pos);
-            int internalDinucIndex = getDinucIndex(datum.dinuc);
-            if ( internalDinucIndex == -1 ) return;
-
-            set(posIndex, datum.qual, internalDinucIndex, datum);
-            flattenData.add(datum);
-        }
+            data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f);
+        }            
    }
    
-    public RecalData getRecalData(int pos, int qual, String dinuc) {
-        return expandingGetRecalData(pos, qual, dinuc, false);
-    }
-
-    public RecalData expandingGetRecalData(int pos, int qual, String dinuc, boolean expandP) {
-        int posIndex = getPosIndex(pos);
-        int internalDinucIndex = getDinucIndex(dinuc);
-
-        if ( internalDinucIndex == -1 ) return null;
-
-        RecalData datum = get(posIndex, qual, internalDinucIndex);
-        if ( datum == null && expandP ) {
-            //System.out.printf("Allocating %s %d %d %d%n", readGroup, pos, qual, dinuc_index);
-            datum = new RecalData(posIndex, qual, readGroup, trackDinuc ? dinuc : "**");
-            set(posIndex, qual, internalDinucIndex, datum);
-            flattenData.add(datum);
-        }
-
-        return datum;
+    RecalDataManager( final int estimatedCapacity ) {
+        data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f ); // second arg is the 'loading factor',
+                                                                    //   a number to monkey around with when optimizing performace of the HashMap
    }

    /**
-     * Returns the RecalData associated with pos, qual, dinuc, or null if not is bound.
-     * Does not expand the system to corporate a new data point
-     * @param posIndex
-     * @param qual
-     * @param dinucIndex
-     * @return
+     * Add the given mapping to all of the collapsed hash tables
+     * @param key The list of comparables that is the key for this mapping
+     * @param fullDatum The RecalDatum which is the data for this mapping
     */
-    private RecalData get(int posIndex, int qual, int dinucIndex) {
-        ExpandingArrayList<ExpandingArrayList<RecalData>> d2 = data.get(posIndex);
-        if (d2 == null) return null;
-        ExpandingArrayList<RecalData> d3 = d2.get(qual);
-        return d3 == null ? null : d3.get(dinucIndex);
-    }
+    public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum ) {

-    private void set(int posIndex, int qual, int dinucIndex, RecalData datum) {
-        // grow data if necessary
-        ExpandingArrayList<ExpandingArrayList<RecalData>> d2 = data.get(posIndex);
-        if (d2 == null) {
-            d2 = new ExpandingArrayList<ExpandingArrayList<RecalData>>();
-            data.set(posIndex, d2);
+        // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
+        //data.put(key, thisDatum); // add the mapping to the main table
+
+        // create dataCollapsedReadGroup, the table where everything except read group has been collapsed
+        ArrayList<Comparable> newKey = new ArrayList<Comparable>();
+        newKey.add( key.get(0) ); // make a new key with just the read group
+        RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey );
+        if( collapsedDatum == null ) {
+            dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
+        } else {
+            collapsedDatum.increment(fullDatum);
        }

-        // Expand d2 if necessary
-        ExpandingArrayList<RecalData> d3 = d2.get(qual);
-        if ( d3 == null ) {
-            d3 = new ExpandingArrayList<RecalData>();
-            d2.set(qual, d3);
+        // create dataSumExpectedErrors, the table used to calculate the overall aggregate quality score in which everything except read group is collapsed
+        newKey = new ArrayList<Comparable>();
+        newKey.add( key.get(0) ); // make a new key with just the read group
+        Double sumExpectedErrors = dataSumExpectedErrors.get( newKey );
+        if( sumExpectedErrors == null ) {
+            dataSumExpectedErrors.put( newKey, 0.0 );
+        } else {
+            dataSumExpectedErrors.remove( newKey );
+            sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * fullDatum.getNumObservations();
+            dataSumExpectedErrors.put( newKey, sumExpectedErrors );
        }

-        // set d3 to datum
-        d3.set(dinucIndex, datum);
-    }
+        newKey = new ArrayList<Comparable>();
+        // create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
+        newKey.add( key.get(0) ); // make a new key with the read group ...
+        newKey.add( key.get(1) ); //                                    and quality score
+        collapsedDatum = dataCollapsedQualityScore.get( newKey );
+        if( collapsedDatum == null ) {
+            dataCollapsedQualityScore.put( newKey, new RecalDatum(fullDatum) );
+        } else {
+            collapsedDatum.increment(fullDatum);
+        }

-    private RecalData getMatchingDatum(List<RecalData> l, RecalData datum,
-                                       boolean combinePos, boolean combineQual, boolean combineDinuc) {
-        for ( RecalData find : l ) {
-            if ( (combineQual  || find.qual == datum.qual) &&
-                 (combinePos   || find.pos == datum.pos ) &&
-                 (combineDinuc || find.dinuc.equals(datum.dinuc)) ) {
-                //System.out.printf("Found %s for %s%n", find, datum);
-                return find;
+        // create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
+        for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { // readGroup and QualityScore aren't counted
+            newKey = new ArrayList<Comparable>();
+            newKey.add( key.get(0) ); // make a new key with the read group ...
+            newKey.add( key.get(1) ); //                                    and quality score ...
+            newKey.add( key.get(iii + 2) ); //                                                    and the given covariate
+            collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
+            if( collapsedDatum == null ) {
+                dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
+            } else {
+                collapsedDatum.increment(fullDatum);
            }
        }
-        RecalData d = new RecalData(combinePos ? 0 : datum.pos, combineQual ? 0 : datum.qual, datum.readGroup, combineDinuc ? "**" : datum.dinuc );
-        //System.out.printf("Making new match %s%n", d);
-        l.add(d);
-        return d;
    }

-    public List<RecalData> combineDinucs() {
-        return combine(false, false, true);
-    }
+    /**
+     * Loop over all the collapsed tables and turn the recalDatums found there into an empricial quality score
+     *   that will be used in the sequential calculation in TableRecalibrationWalker
+     * @param numCovariates The number of covariates you have determines the number of tables to create
+     * @param smoothing The smoothing paramter that goes into empirical quality score calculation
+     */
+    public final void generateEmpiricalQualities( final int numCovariates, final int smoothing ) {

-    public List<RecalData> combineCycles() {
-        return combine(true, false, false);
-    }
-
-    public List<RecalData> combine(boolean ignorePos, boolean ignoreQual, boolean ignoreDinuc) {
-        List<RecalData> l = new ArrayList<RecalData>();
-        for ( RecalData datum : flattenData ) {
-            RecalData reduced = getMatchingDatum(l, datum, ignorePos, ignoreQual, ignoreDinuc);
-            //System.out.printf("Combining %s with %s%n", datum, reduced);
-            reduced.inc(datum);
+        dataCollapsedReadGroupDouble = new NHashMap<Double>();
+        dataCollapsedQualityScoreDouble = new NHashMap<Double>();
+        dataCollapsedByCovariateDouble = new ArrayList<NHashMap<Double>>();
+        for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
+            dataCollapsedByCovariateDouble.add( new NHashMap<Double>() );
        }
-        return l;
-    }

-    public List<RecalData> getAll() {
-        return flattenData;
-    }
-
-    // we can process the OQ field if requested
-    public static byte[] getQualsForRecalibration( SAMRecord read, boolean useOriginalQuals ) {
-        byte[] quals = read.getBaseQualities();
-        if ( useOriginalQuals && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
-            Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
-            if ( obj instanceof String )
-                quals = QualityUtils.fastqToPhred((String)obj);
-            else {
-                throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
+        // Hash the empirical quality scores so we don't have to call Math.log at every base for every read
+        // Looping over the entrySet is really expensive but worth it
+        for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedReadGroup.entrySet() ) {
+            dataCollapsedReadGroupDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
+        }
+        for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedQualityScore.entrySet() ) {
+            dataCollapsedQualityScoreDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
+        }
+        for( int iii = 0; iii < numCovariates - 2; iii++ ) {
+            for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedByCovariate.get(iii).entrySet() ) {
+                dataCollapsedByCovariateDouble.get(iii).put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
            }
        }

-        return quals;
+        dataCollapsedQualityScore.clear();
+        dataCollapsedByCovariate.clear();
+        dataCollapsedQualityScore = null; // will never need this again
+        dataCollapsedByCovariate = null; // will never need this again
+        if( data!=null ) {
+            data.clear();
+            data = null; // will never need this again
+        }
+    }
+
+    /**
+     * Get the appropriate collapsed table out of the set of all the tables held by this Object
+     * @param covariate Which covariate indexes the desired collapsed HashMap
+     * @return The desired collapsed HashMap
+     */
+    public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
+        if( covariate == 0) {
+            return dataCollapsedReadGroup; // table where everything except read group has been collapsed
+        } else if( covariate == 1 ) {
+            return dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
+        } else {
+            return dataCollapsedByCovariate.get( covariate - 2 ); // table where everything except read group, quality score, and given covariate has been collapsed
+        }
+    }
+
+    /**
+     * Get the appropriate collapsed table of emprical quality out of the set of all the tables held by this Object
+     * @param covariate Which covariate indexes the desired collapsed NHashMap<Double>
+     * @return The desired collapsed NHashMap<Double>
+     */
+    public final NHashMap<Double> getCollapsedDoubleTable( final int covariate ) {
+        if( covariate == 0) {
+            return dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
+        } else if( covariate == 1 ) {
+            return dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
+        } else {
+            return dataCollapsedByCovariateDouble.get( covariate - 2 ); // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed
+        }
    }
 }
-
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
+package org.broadinstitute.sting.gatk.walkers.recalibration;

 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@ -1,3 +1,22 @@
+package org.broadinstitute.sting.gatk.walkers.recalibration;
+
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMReadGroupRecord;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.walkers.WalkerName;
+import org.broadinstitute.sting.gatk.walkers.Requires;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.*;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.io.File;
+import java.io.FileNotFoundException;
+
 /*
 * Copyright (c) 2009 The Broad Institute
 *
@ -23,311 +42,408 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-package org.broadinstitute.sting.gatk.walkers.recalibration;
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ *
+ * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal.

-import net.sf.samtools.SAMFileWriter;
-import net.sf.samtools.SAMRecord;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.walkers.DataSource;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.gatk.walkers.Requires;
-import org.broadinstitute.sting.gatk.walkers.WalkerName;
-import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.cmdLine.Argument;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+ * For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
+ * Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read.
+ * This walker then outputs a new bam file with these updated (recalibrated) reads.
+ * 
+ * Note: This walker expects as input the recalibration table file generated previously by CovariateCounterWalker.
+ * Note: This walker is designed to be used in conjunction with CovariateCounterWalker.
+ */

@WalkerName("TableRecalibration")
-@Requires({DataSource.READS}) // , DataSource.REFERENCE})
+@Requires({ DataSource.READS, DataSource.REFERENCE }) // This walker requires -I input.bam, it also requires -R reference.fasta, but by not saying @requires REFERENCE_BASES I'm telling the
+                                                                                                        // GATK to not actually spend time giving me the refBase array since I don't use it
 public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
-    @Argument(fullName="params", shortName="params", doc="CountCovariates params file", required=true)
-    public String paramsFile;

-    @Argument(fullName="outputBam", shortName="outputBam", doc="output BAM file", required=true)
-    public SAMFileWriter outputBam = null;
+    @Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
+    private String RECAL_FILE = null;
+    @Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=false)
+    private SAMFileWriter OUTPUT_BAM = null;
+    @Argument(fullName="preserve_qscores_less_than", shortName="pQ",
+        doc="If provided, bases with quality scores less than this threshold won't be recalibrated.  In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
+    private int PRESERVE_QSCORES_LESS_THAN = 5;
+    @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
+    private boolean USE_ORIGINAL_QUALS = false;
+    @Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
+    private int WINDOW_SIZE = 3;
+    @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
+    private int SMOOTHING = 1;
+    @Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
+    private boolean VALIDATE_OLD_RECALIBRATOR = false;
+    @Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
+    private boolean USE_SLX_PLATFORM = false;

-    @Argument(shortName="rawQempirical", doc="If provided, we will use raw Qempirical scores calculated from the # mismatches and # bases, rather than the more conservative estimate of # mismatches + 1 / # bases + 1", required=false)
-    public boolean rawQempirical = false;
+    //public enum RecalibrationMode {
+    //    COMBINATORIAL,
+    //    SEQUENTIAL,
+    //    ERROR
+    //}

-    @Argument(fullName="preserveQScoresLessThan", shortName="pQ", doc="If provided, bases with quality scores less than this threshold won't be recalibrated.  In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
-    public int preserveQScoresLessThan = 5;
+    //@Argument(fullName="recalibrationMode", shortName="mode", doc="Which calculation to use when recalibrating, default is SEQUENTIAL", required=false)
+    private String MODE_STRING = "SEQUENTIAL";
+    //public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: do we need to support the other modes?

-    @Argument(fullName = "useOriginalQuals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
-    public boolean useOriginalQuals = false;
-    
-    //
-    // Basic static information
-    //
-    private static Logger logger = Logger.getLogger(TableRecalibrationWalker.class);
-    private static String VERSION = "0.2.5";
-
-    // maps from [readGroup] -> [prevBase x base -> [cycle, qual, new qual]]
-    HashMap<String, RecalMapping> cache = new HashMap<String, RecalMapping>();
-
-    public enum RecalibrationMode {
-        COMBINATORIAL,
-        JAFFE,
-        BY_POS_ONLY,
-        BY_DINUC_ONLY,
-        SEQUENTIAL,
-        ERROR
-    }
-    
-    @Argument(shortName="mode", doc="", required=false)
-    public String modeString = RecalibrationMode.SEQUENTIAL.toString();
-    public RecalibrationMode mode = RecalibrationMode.ERROR;
+    private RecalDataManager dataManager;
+    private ArrayList<Covariate> requestedCovariates;
+    private ArrayList<Comparable> fullCovariateKey; // the list that will be used over and over again to hold the full set of covariate values
+    private ArrayList<Comparable> collapsedTableKey; // the key that will be used over and over again to query the collapsed tables

    private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
-    private static Pattern COLLAPSED_POS_PATTERN = Pattern.compile("^#\\s+collapsed_pos\\s+(\\w+)");
-    private static Pattern COLLAPSED_DINUC_PATTERN = Pattern.compile("^#\\s+collapsed_dinuc\\s+(\\w+)");
-    private static Pattern HEADER_PATTERN = Pattern.compile("^rg.*");
+    private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
+    private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");

+    private final String versionNumber = "2.0.0"; // major version, minor version, and build number
+    
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // initialize
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Read in the recalibration table input file.
+     * Parse the list of covariate classes used during CovariateCounterWalker.
+     * Parse the CSV data and populate the hashmap.
+     */
    public void initialize() {
-        logger.info("TableRecalibrator version: " + VERSION);
-        
-        //
-        // crappy hack until Enum arg types are supported
-        //
-        for ( RecalibrationMode potential : RecalibrationMode.values() ) {
-            if ( potential.toString().equals(modeString)) {
-                mode = potential;
-                break;
-            }
-        }
-        if ( mode == RecalibrationMode.ERROR )
-            throw new RuntimeException("Unknown mode requested: " + modeString);

+        logger.info( "TableRecalibrationWalker version: " + versionNumber );
+
+        // Get a list of all available covariates
+        List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);

-        //
-        // Walk over the data file, parsing it into recalibration data
-        //
        int lineNumber = 0;
-        try {
-            logger.info(String.format("Reading data..."));
-            List<RecalData> data = new ArrayList<RecalData>();
-            boolean collapsedPos = false;
-            boolean collapsedDinuc = false;
+        boolean foundAllCovariates = false;
+        int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one

-            //List<String> lines = new xReadLines(new File(paramsFile)).readLines();
-            for ( String line : new xReadLines(new File(paramsFile)) ) {
+        // Warn the user if a dbSNP file was specified since it isn't being used here
+        boolean foundDBSNP = false;
+        for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
+            if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
+                foundDBSNP = true;
+            }
+        }
+        if( foundDBSNP ) {
+            Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
+        }
+
+        fullCovariateKey = new ArrayList<Comparable>();
+        collapsedTableKey = new ArrayList<Comparable>();
+
+        // Read in the covariates that were used from the input file
+        requestedCovariates = new ArrayList<Covariate>();
+
+        // Read in the data from the csv file and populate the map
+        logger.info( "Reading in the data from input file..." );
+
+        try {
+            for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
                lineNumber++;
-                if ( HEADER_PATTERN.matcher(line).matches() )
-                    continue;
-                if ( COMMENT_PATTERN.matcher(line).matches() ) {
-                    collapsedPos = parseCommentLine(COLLAPSED_POS_PATTERN, line, collapsedPos);
-                    collapsedDinuc = parseCommentLine(COLLAPSED_DINUC_PATTERN, line, collapsedDinuc);
+                if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches())  {
+                    ; // skip over the comment lines, (which start with '#')
                }
-                else {
-                    data.add(RecalData.fromCSVString(line));
+                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // the line string is either specifying a covariate or is giving csv data
+                    if( foundAllCovariates ) {
+                        throw new StingException( "Malformed input recalibration file. Found covariate names intermingled with data. " + RECAL_FILE );
+                    } else { // found another covariate in input file
+                        boolean foundClass = false;
+                        for( Class<?> covClass : classes ) {
+
+                            if( line.equalsIgnoreCase( "@!" + covClass.getSimpleName() ) ) { // the "@!" was added by CovariateCounterWalker as a code to recognize covariate class names
+                                foundClass = true;
+                                try {
+                                    Covariate covariate = (Covariate)covClass.newInstance();
+                                    // some covariates need parameters (user supplied command line arguments) passed to them
+                                    if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
+                                    requestedCovariates.add( covariate );
+                                    estimatedCapacity *= covariate.estimatedNumberOfBins();
+                                    
+                                } catch ( InstantiationException e ) {
+                                    throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
+                                } catch ( IllegalAccessException e ) {
+                                    throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
+                                }
+                            }
+                        }
+
+                        if( !foundClass ) {
+                            throw new StingException( "Malformed input recalibration file. The requested covariate type (" + line + ") isn't a valid covariate option." );
+                        }
+
+                    }
+
+                } else { // found some data
+                    if( !foundAllCovariates ) {
+                        if( VALIDATE_OLD_RECALIBRATOR ) {
+                            requestedCovariates.add( new ReadGroupCovariate() );
+                            requestedCovariates.add( new QualityScoreCovariate() );
+                            requestedCovariates.add( new CycleCovariate() );
+                            requestedCovariates.add( new DinucCovariate() );
+                        }
+                        foundAllCovariates = true;
+                        if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
+                        final boolean createCollapsedTables = true;
+                        // Initialize the data hashMaps
+                        dataManager = new RecalDataManager( estimatedCapacity, createCollapsedTables, requestedCovariates.size() );
+
+                    }
+                    addCSVData(line); // parse the line and add the data to the HashMap
                }
            }
-            initializeCache(data, collapsedPos, collapsedDinuc);
+
        } catch ( FileNotFoundException e ) {
-            Utils.scareUser("Cannot read/find parameters file " + paramsFile);
+            Utils.scareUser("Can not find input file: " + RECAL_FILE);
        } catch ( NumberFormatException e ) {
-            throw new RuntimeException("Error parsing recalibration data at line " + lineNumber, e);
+            throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
        }
-    }
+        logger.info( "...done!" );

-    private boolean parseCommentLine(Pattern pat, String line, boolean flag) {
-        Matcher m = pat.matcher(line);
-        if ( m.matches() ) {
-            flag = Boolean.parseBoolean(m.group(1));
-        }
+        logger.info( "The covariates being used here: " );
+        logger.info( requestedCovariates );

-        return flag;
-    }
-
-    private void initializeCache(List<RecalData> data, boolean collapsedPos, boolean collapsedDinuc ) {
-        Set<String> readGroups = new HashSet<String>();
-        Set<String> dinucs = new HashSet<String>();
-        int maxPos = -1;
-        int maxQReported = -1;
-
-        logger.info(String.format("No. params       : %d", data.size()));
-        // get the maximum data from the file
-        for ( RecalData datum : data ) {
-            readGroups.add(datum.readGroup);
-            dinucs.add(datum.dinuc);
-            maxPos = Math.max(maxPos, datum.pos);
-            maxQReported = Math.max(maxQReported, datum.qual);
-        }
-        logger.info(String.format("Read groups      : %d %s", readGroups.size(), readGroups.toString()));
-        logger.info(String.format("Dinucs           : %d %s", dinucs.size(), dinucs.toString()));
-        logger.info(String.format("Max pos          : %d", maxPos));
-        logger.info(String.format("Max Q reported   : %d", maxQReported));
-
-        // check the mode
-        switch ( mode ) {
-            case JAFFE:
-                collapsedPos = collapsedDinuc = true;
-                break;
-            case BY_POS_ONLY:
-                if ( collapsedPos )
-                    throw new RuntimeException(String.format("Cannot perform position_only recalibration -- data is already partially collapsed by pos=%b and dinuc=%b", collapsedPos, collapsedDinuc));
-                collapsedPos = true;
-                throw new RuntimeException("Unsupported mode requested, sorry");
-                //break;
-            case BY_DINUC_ONLY:
-                if ( collapsedDinuc )
-                    throw new RuntimeException(String.format("Cannot perform dinuc_only recalibration -- data is already partially collapsed by pos=%b and dinuc=%b", collapsedPos, collapsedDinuc));
-                collapsedDinuc = true;
-                throw new RuntimeException("Unsupported mode requested, sorry");
-                //break;
-            case COMBINATORIAL:
-                if ( collapsedPos || collapsedDinuc )
-                    throw new RuntimeException(String.format("Cannot perform combinatorial recalibration -- data is already collapsed by pos=%b and dinuc=%b", collapsedPos, collapsedDinuc));
-            case SEQUENTIAL:
-                if ( collapsedPos || collapsedDinuc )
-                    throw new RuntimeException(String.format("Cannot perform sequential recalibration -- data is already collapsed by pos=%b and dinuc=%b", collapsedPos, collapsedDinuc));
-        }
-
-        //
-        // initialize the data structures
-        //
-        HashMap<String, RecalDataManager> managers = new HashMap<String, RecalDataManager>();
-        for ( String readGroup : readGroups ) {
-            // create a manager for each read group
-            RecalDataManager manager = new RecalDataManager(readGroup, ! collapsedPos, ! collapsedDinuc);
-            managers.put(readGroup, manager);
-        }
-
-        // add the data to their appropriate readGroup managers
-        for ( RecalData datum : data ) {
-            managers.get(datum.readGroup).addDatum(datum);
-        }
-
-        // fill in the table with mapping objects
-        for ( String readGroup : readGroups ) {
-            RecalDataManager manager = managers.get(readGroup);
-            RecalMapping mapper = initializeMapper(manager, rawQempirical, dinucs, maxPos, maxQReported);
-            logger.info(String.format("Creating mapper for %s of class %s", readGroup, mapper));            
-            cache.put(readGroup, mapper);
+        // Create the collapsed tables that are used in the sequential calculation
+        if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
+        	logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
+            dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
+            logger.info( "...done!" );
        }
    }

    /**
-     * Returns a new RecalMapping object appropriate for the requested mode in this.mode for the RecalData
-     * in manager.  Uses the dinucs set dinucs, maxPos, and maxQReported to build efficient lookup structures
-     * for mapping from old qual and features -> new qual
-     *
-     * @param manager
-     * @param dinucs
-     * @param maxPos
-     * @param maxQReported
-     * @return
+     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
+     * @param line A line of CSV data read from the recalibration table data file
     */
-    private RecalMapping initializeMapper( RecalDataManager manager, final boolean rawQempirical,
-                                           Set<String> dinucs, int maxPos, int maxQReported ) {
-        switch ( mode ) {
-            case COMBINATORIAL:
-            case JAFFE:
-                return new CombinatorialRecalMapping(manager, rawQempirical, dinucs, maxPos, maxQReported);
-            case SEQUENTIAL:
-                return new SerialRecalMapping(manager, rawQempirical, dinucs, maxPos, maxQReported);
-            default:
-                throw new RuntimeException("Unimplemented recalibration mode: " + mode);
+    private void addCSVData(String line) {
+        String[] vals = line.split(",");
+        ArrayList<Comparable> key = new ArrayList<Comparable>();
+        Covariate cov;
+        int iii;
+        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
+            cov = requestedCovariates.get( iii );
+            if( VALIDATE_OLD_RECALIBRATOR ) {
+                if( iii == 1 ) { // order is different in the old recalibrator
+                    key.add( cov.getValue( vals[2] ) );
+                } else if ( iii == 2 ) {
+                    key.add( cov.getValue( vals[1] ) );
+                } else {
+                    key.add( cov.getValue( vals[iii] ) );
+                }
+            } else {
+                key.add( cov.getValue( vals[iii] ) );
+            }
        }
+        RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ) );
+        dataManager.addToAllTables( key, datum );
+        
    }

-    public SAMRecord map(char[] ref, SAMRecord read) {
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // map
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
+     * @param refBases References bases over the length of the read
+     * @param read The read to be recalibrated
+     * @return The read with quality scores replaced
+     */
+    public SAMRecord map( char[] refBases, SAMRecord read ) {
+
+        // WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
+        // This is done in order to speed up the code
+
+        byte[] originalQuals = read.getBaseQualities();
+        // Check if we need to use the original quality scores instead
+        if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
+            Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
+            if ( obj instanceof String )
+                originalQuals = QualityUtils.fastqToPhred((String)obj);
+            else {
+                throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
+            }
+        }
+        byte[] recalQuals = originalQuals.clone();
+
+        // These calls are all expensive so only do them once for each read
+        final SAMReadGroupRecord readGroup = read.getReadGroup();
+        final String readGroupId = readGroup.getReadGroupId();
+        String platform = readGroup.getPlatform();
+        final boolean isNegStrand = read.getReadNegativeStrandFlag();
+        if( USE_SLX_PLATFORM ) {
+            platform = "ILLUMINA";
+        }
        byte[] bases = read.getReadBases();
+        int startPos = 1;
+        int stopPos = bases.length;

-
-        byte[] quals = RecalDataManager.getQualsForRecalibration(read, useOriginalQuals);
-
-        // Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
-        if (read.getReadNegativeStrandFlag()) {
-            bases = BaseUtils.simpleReverseComplement(bases);
-            quals = BaseUtils.reverse(quals);
+        if( isNegStrand ) { // DinucCovariate is responsible for getting the complement base if needed
+            startPos = 0;
+            stopPos = bases.length - 1;
        }

-        try {
-            byte[] recalQuals = recalibrateBasesAndQuals(read.getAttribute("RG").toString(), bases, quals);
+        ReadHashDatum readDatum = new ReadHashDatum( readGroupId, platform, originalQuals, bases, isNegStrand, read.getMappingQuality(), bases.length );

-            // Don't change Q scores below some threshold
-            preserveQScores(quals, recalQuals, read);
+        // For each base in the read
+        for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read

-            if (read.getReadNegativeStrandFlag()) {               // reverse the quals for the neg strand read
-                recalQuals = BaseUtils.reverse(recalQuals);
-                quals = BaseUtils.reverse(quals);
-            }
-            
-            read.setBaseQualities(recalQuals);
-            if ( read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) {
-                read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(quals));
+            // Get the covariate values which make up the key
+            for( Covariate covariate : requestedCovariates ) {
+                fullCovariateKey.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
            }

-            return read;
-        } catch ( StingException e ) {
-            throw new RuntimeException(String.format("Bug found while processing read %s: %s", read.format(), e.getMessage()));
+            recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
+            fullCovariateKey.clear();
        }
-    }

-    private void preserveQScores( byte[] originalQuals, byte[] recalQuals, SAMRecord read ) {
-        for ( int i = 0; i < recalQuals.length; i++ ) {
-            if ( originalQuals[i] < preserveQScoresLessThan ) {
-                //System.out.printf("Preserving Q%d base at %d in read %s%n", originalQuals[i], i, read.getReadName());
-                recalQuals[i] = originalQuals[i];
-            }
+        preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
+
+        // SOLID bams insert the reference base into the read if the color space quality is zero, so don't change their base quality scores
+        if( platform.equalsIgnoreCase("SOLID") ) {
+            byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
+            if(colorSpaceQuals != null) { preserveBadColorSpaceQualities_SOLID( originalQuals, recalQuals, colorSpaceQuals ); }
        }
+
+        read.setBaseQualities(recalQuals); // overwrite old qualities with new recalibrated qualities
+        if ( read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if the tag isn't already taken in the read
+            read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(originalQuals));
+        }
+
+        return read;
    }

    /**
-     * Workhorse routine.  Given a read group and an array of bases and quals, returns a new set of recalibrated
-     * qualities for each base/qual in bases and quals.  Uses the RecalMapping object associated with readGroup.
+     * Implements a serial recalibration of the reads using the combinational table.
+     * First, we perform a positional recalibration, and then a subsequent dinuc correction.
     *
-     * @param readGroup
-     * @param bases
-     * @param quals
-     * @return
+     * Given the full recalibration table, we perform the following preprocessing steps:
+     *
+     *   - calculate the global quality score shift across all data [DeltaQ]
+     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
+     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
+     *   - The final shift equation is:
+     *
+     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) 
+     * @param key The list of Comparables that were calculated from the covariates
+     * @return A recalibrated quality score as a byte
     */
-    public byte[] recalibrateBasesAndQuals(final String readGroup, byte[] bases, byte[] quals) throws StingException {
-        if ( readGroup == null ) { throw new StingException("BUG: read group is null"); }
-        if ( bases == null ) { throw new StingException("BUG: bases array is null"); }
-        if ( bases.length == 0) { throw new StingException("BUG: bases array is size 0"); }
-        if ( quals == null ) { throw new StingException("BUG: quals array is null"); }
+    private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {

-        byte[] recalQuals = new byte[quals.length];
-        RecalMapping mapper = cache.get(readGroup);
-        if ( mapper == null ) {
-            // throw new StingException(String.format("BUG: couldn't find RecalMapping for readgroup %s", readGroup));
-            // Edited by ebanks on 8/9/09
-            // Actually, there is a valid case when the mapper == null: when all reads in a RG are
-            // unmapped or have mapping quality == 0.  In this case, we don't want to die - nor do we
-            // want to lose these reads - so we just don't alter the quals
-            for ( int cycle = 0; cycle < bases.length; cycle++ )
-                recalQuals[cycle] = quals[cycle];
-            return recalQuals;
+        String readGroupKeyElement = key.get(0).toString();
+        int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
+        byte qualFromRead = (byte)qualityScoreKeyElement;
+
+        // The global quality shift (over the read group only)
+        collapsedTableKey.add( readGroupKeyElement );
+        RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get(collapsedTableKey);
+        Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get(collapsedTableKey);
+        double globalDeltaQ = 0.0;
+        double aggregrateQreported = 0.0;
+        if( globalDeltaQDatum != null ) {
+            aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get(collapsedTableKey) / ((double) globalDeltaQDatum.getNumObservations()) );
+            globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported;
        }

-        recalQuals[0] = quals[0];               // can't change the first -- no dinuc
-        for ( int cycle = 1; cycle < bases.length; cycle++ ) { // skip first and last base, qual already set because no dinuc
-            byte qual = quals[cycle];
-            byte newQual = mapper.getNewQual(readGroup, bases[cycle - 1], bases[cycle], cycle, qual);
-
-            if ( newQual <= 0 || newQual > QualityUtils.MAX_REASONABLE_Q_SCORE )
-                throw new StingException(String.format("Bug found -- assigning bad quality score %d x %d => %d", cycle, qual, newQual));
-
-            recalQuals[cycle] = newQual;
-            //System.out.printf("Mapping %d => %d%n", qual, newQual);
+        // The shift in quality between reported and empirical
+        collapsedTableKey.add( qualityScoreKeyElement );
+        Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get(collapsedTableKey);
+        double deltaQReported = 0.0;
+        if( deltaQReportedEmpirical != null ) {
+            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
+        }
+        
+        // The shift in quality due to each covariate by itself in turn
+        double deltaQCovariates = 0.0;
+        Double deltaQCovariateEmpirical;
+        double deltaQPos = 0.0;
+        double deltaQDinuc = 0.0;
+        for( int iii = 2; iii < key.size(); iii++ ) {
+            collapsedTableKey.add( key.get(iii) ); // the given covariate
+            deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get(collapsedTableKey);
+            if( deltaQCovariateEmpirical != null ) {
+                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
+                if( VALIDATE_OLD_RECALIBRATOR ) {
+                    if(iii==2) { deltaQPos = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); } // BUGBUG: only here to validate against the old recalibrator
+                    if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
+                }
+            }
+            collapsedTableKey.remove( 2 ); // this new covariate is always added in at position 2 in the collapsedTableKey list
        }

-        return recalQuals;
-    }
+        double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
+        byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );

-    public SAMFileWriter reduceInit() {
-        return outputBam;
+
+        // Verbose printouts used to validate with old recalibrator
+        //if(key.contains(null)) {
+        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
+        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
+        //}
+        //else {
+        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
+        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
+        //}
+
+        collapsedTableKey.clear();
+        return newQualityByte;
    }

    /**
-     * Write out the read
+     * Loop of the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
+     * @param originalQuals The list of original base quality scores
+     * @param recalQuals A list of the new recalibrated quality scores
     */
-    public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
+    private void preserveQScores( final byte[] originalQuals, byte[] recalQuals ) {
+        for( int iii = 0; iii < recalQuals.length; iii++ ) {
+            if ( originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN ) {
+                recalQuals[iii] = originalQuals[iii];
+            }
+        }
+    }
+
+    /**
+     * Loop of the list of qualities and overwrite the newly recalibrated score to be the original score if the color space quality is zero
+     * @param originalQuals The list of original base quality scores
+     * @param recalQuals A list of the new recalibrated quality scores
+     * @param colorSpaceQuals The list of color space quality scores for this read
+     */
+    private void preserveBadColorSpaceQualities_SOLID( final byte[] originalQuals, byte[] recalQuals, final byte[] colorSpaceQuals ) {
+        for( int iii = 0; iii < recalQuals.length; iii++ ) {
+            if ( colorSpaceQuals[iii] <= 0 ) { //BUGBUG: This isn't exactly correct yet
+                recalQuals[iii] = originalQuals[iii];
+            }
+        }
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // reduce
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Nothing start the reduce with a new handle to the output bam file
+     * @return A FileWriter pointing to a new bam file
+     */
+    public SAMFileWriter reduceInit() {
+        return OUTPUT_BAM;
+    }
+
+    /**
+     * Output each read to disk
+     * @param read The read to output
+     * @param output The FileWriter to write the read to
+     * @return The FileWriter
+     */
+    public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
        if ( output != null ) {
            output.addAlignment(read);
        } else {
@ -337,191 +453,3 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
        return output;
    }
 }
-
-interface RecalMapping {
-    public byte getNewQual(final String readGroup, byte prevBase, byte base, int cycle, byte qual);
-}
-
-class CombinatorialRecalMapping implements RecalMapping {
-    ArrayList<byte[][]> cache;
-    RecalDataManager manager;
-
-    public CombinatorialRecalMapping(RecalDataManager manager, final boolean useRawQempirical,
-                                     Set<String> dinucs, int maxPos, int maxQReported ) {
-        this.manager = manager;
-
-        // initialize the data structure
-        cache = new ArrayList<byte[][]>(RecalData.NDINUCS);
-        for ( String dinuc : dinucs ) {
-            cache.add(new byte[maxPos+1][maxQReported+1]);
-        }
-
-        for ( RecalData datum : manager.getAll() ) {
-            //System.out.printf("Adding datum %s%n", datum);
-            byte [][] table = cache.get(this.manager.getDinucIndex(datum.dinuc));
-            int pos = manager.canonicalPos(datum.pos);
-            if ( table[pos][datum.qual] != 0 )
-                throw new RuntimeException(String.format("Duplicate entry discovered: %s", datum));
-            table[pos][datum.qual] = datum.empiricalQualByte(useRawQempirical);
-            //System.out.printf("Binding %d %d => %d%n", pos, datum.qual, datum.empiricalQualByte(useRawQempirical));
-        }
-    }
-
-    public byte getNewQual(final String readGroup, byte prevBase, byte base, int cycle, byte qual) {
-        int pos = manager.canonicalPos(cycle);
-        int index = this.manager.getDinucIndex(prevBase, base);
-        byte[][] dataTable = index == -1 ? null : cache.get(index);
-
-        if ( dataTable == null && prevBase != 'N' && base != 'N' )
-            throw new RuntimeException(String.format("Unmapped data table at %s %c%c", readGroup, (char)prevBase, (char)base));
-
-        return dataTable != null && pos < dataTable.length ? dataTable[pos][qual] : qual;
-    }
-}
-
-
-/**
- * Implements a serial recalibration of the reads using the combinational table.  First,
- * we perform a positional recalibration, and then a subsequent dinuc correction.
- *
- * Given the full recalibration table, we perform the following preprocessing steps:
- *
- *   - calculate the global quality score shift across all data [DeltaQ]
- *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
- *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
- *   - The final shift equation is:
- *
- *       Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc)
- */
-class SerialRecalMapping implements RecalMapping {
-    private double globalDeltaQ = 0.0;
-    private double[][] deltaQPosMap, deltaQDinucMap;
-    double [] deltaQualMap;
-    RecalData [][] qPosSupports = null, qDinucSupports = null;
-
-    CombinatorialRecalMapping combiMap;
-    RecalDataManager manager;
-
-    String dinucToLookAt = null; // "CC";
-    int posToLookAt = -1;
-    int qualToLookAt = 25;
-
-    public SerialRecalMapping(RecalDataManager manager, final boolean useRawQempirical,
-                              Set<String> dinucs, int maxPos, int maxQReported ) {
-        //combiMap = new CombinatorialRecalMapping(manager, dinucs, maxPos, maxQReported );
-        this.manager = manager;
-
-        // initialize the data structure
-        {
-            RecalData datum = new RecalData(0, 0, manager.readGroup, "**").inc(manager.getAll());
-            double aggregrateQreported = RecalData.combinedQreported(manager.getAll());
-            globalDeltaQ = datum.empiricalQualDouble(useRawQempirical) - aggregrateQreported;
-            //System.out.printf("Global quality score shift is %.2f - %.2f = %.2f%n", datum.empiricalQualDouble(useRawQempirical), aggregrateQreported, globalDeltaQ);
-        }
-
-        for ( RecalData datum : manager.getAll() ) {
-            if ( printStateP(datum) )
-                System.out.printf("PrintValue: %s%n", datum);
-        }
-
-        // Jaffe-style
-        deltaQualMap = new double[maxQReported+1];
-        for ( RecalData datum : RecalData.sort(manager.combine(true, false, true)) ) {
-            deltaQualMap[datum.qual] = datum.empiricalQualDouble(useRawQempirical) - datum.qual - globalDeltaQ;
-            //System.out.printf("%s => %s%n", datum, deltaQualMap[datum.qual]);
-        }
-
-        // calculate the delta Q pos array
-        deltaQPosMap = new double[maxPos+1][maxQReported+1];
-        //qPosSupports = new RecalData[maxPos+1][maxQReported+1];
-        for ( RecalData datumAtPosQual : manager.combineDinucs() ) {
-            double offset = globalDeltaQ + deltaQualMap[datumAtPosQual.qual];
-            updateCache(qPosSupports, datumAtPosQual, useRawQempirical, deltaQPosMap, datumAtPosQual.pos, datumAtPosQual.qual, offset);
-        }
-
-        // calculate the delta Q dinuc array
-        deltaQDinucMap = new double[dinucs.size()+1][maxQReported+1];
-        //qDinucSupports = new RecalData[dinucs.size()+1][maxQReported+1];
-        for ( RecalData datumAtDinucQual : manager.combineCycles() ) {
-            double offset = globalDeltaQ + deltaQualMap[datumAtDinucQual.qual];
-            updateCache(qDinucSupports, datumAtDinucQual, useRawQempirical, deltaQDinucMap, datumAtDinucQual.getDinucIndex(), datumAtDinucQual.qual, offset);
-        }
-
-        validateTables(maxPos, maxQReported, dinucs.size(), deltaQPosMap, deltaQDinucMap, useRawQempirical, manager.getAll());
-    }
-
-
-    private void validateTables(int maxPos, int maxQReported, int nDinucs,
-                                double[][] deltaQPosMap, double[][] deltaQDinucMap,
-                                final boolean useRawQempirical, List<RecalData> data ) {
-        for ( int i = 0; i < maxPos; i++ ) {
-            for ( int j = 0; j < maxQReported; j++ ) {
-                if ( printStateP(i, null, j) )
-                    System.out.printf("Mapping: pos=%d qual=%2d delta=%.2f based on %s%n",
-                            i, j, deltaQPosMap[i][j], qPosSupports != null ? qPosSupports[i][j] : null);
-            }
-        }
-
-        for ( int i = 0; i < nDinucs; i++ ) {
-            for ( int j = 0; j < maxQReported; j++ ) {
-                String dinuc = RecalData.dinucIndex2bases(i);
-                if ( printStateP(0, dinuc, j ) )
-                    System.out.printf("Mapping: dinuc=%s qual=%2d delta=%.2f based on %s%n",
-                            dinuc, j, deltaQDinucMap[i][j], qDinucSupports != null ? qDinucSupports[i][j] : null);
-            }
-        }
-
-        for ( RecalData datum : RecalData.sort(data) ) {
-            byte newQual = getNewQual(datum.readGroup, (byte)datum.dinuc.charAt(0), (byte)datum.dinuc.charAt(1), datum.pos, (byte)datum.qual);
-            if ( printStateP( datum ) ) {
-                System.out.printf("Serial mapping %s => %d => %d vs. %d => delta = %d%n",
-                        datum, newQual, datum.empiricalQualByte(useRawQempirical), datum.empiricalQualByte(), newQual - datum.empiricalQualByte(useRawQempirical));
-            }
-        }
-    }
-
-    private void updateCache( RecalData[][] supports, RecalData datum, final boolean useRawQempirical,
-                              double[][] table, int i, int j, double meanQ ) {
-        if ( table[i][j] != 0 )
-            throw new RuntimeException(String.format("Duplicate entry discovered: %s", datum));
-        double deltaQ = datum.empiricalQualDouble(useRawQempirical) - datum.qual - meanQ;
-        table[i][j] = deltaQ;
-        if ( supports != null )
-            supports[i][j] = datum;
-    }
-
-    private boolean printStateP( int cycle, String dinuc, int qual ) {
-        return posToLookAt != -1 &&
-               ( cycle == 0 || posToLookAt == 0 || cycle == posToLookAt ) &&
-               ( dinucToLookAt == null || dinuc == null || manager.getDinucIndex(dinuc) == -1 || dinucToLookAt.equals(dinuc)) &&
-               ( qualToLookAt == 0 || qual == qualToLookAt );
-    }
-
-    private boolean printStateP( RecalData datum ) {
-        return printStateP(datum.pos, datum.dinuc, datum.qual);
-    }
-
-    public byte getNewQual(final String readGroup, byte prevBase, byte base, int cycle, byte qual) {
-        int pos = manager.canonicalPos(cycle);
-        int index = this.manager.getDinucIndex(prevBase, base);
-
-        byte newQualByte = qual;
-        if ( qual < deltaQualMap.length ) {
-            // it's possible to see a qual not in the recal table -- i.e., we only see Q28 at a dbSNP site
-            double deltaQual = deltaQualMap[qual];
-            double deltaQPos = pos >= deltaQPosMap.length ? 0.0 : deltaQPosMap[pos][qual];  // == length indices no data for last base
-            double deltaQDinuc = index == -1 ? 0.0 : deltaQDinucMap[index][qual];           // -1 indices N in prev or current base
-            double newQual = qual + globalDeltaQ + deltaQual + deltaQPos + deltaQDinuc;
-            newQualByte = QualityUtils.boundQual((int)Math.round(newQual), QualityUtils.MAX_REASONABLE_Q_SCORE);
-
-            //System.out.println(String.format("%s %c%c %d %d => %d + %.2f + %.2f + %.2f + %.2f = %d",
-            //            readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQual, deltaQPos, deltaQDinuc, newQualByte));
-
-            if ( newQualByte <= 0 && newQualByte >= QualityUtils.MAX_REASONABLE_Q_SCORE )
-                throw new RuntimeException(String.format("Illegal base quality score calculated: %s %c%c %d %d => %d + %.2f + %.2f + %.2f = %d",
-                        readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQPos, deltaQDinuc, newQualByte));
-        }
-
-        return newQualByte;
-    }
-}
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java
@ -1,484 +0,0 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
-
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
-import org.broadinstitute.sting.gatk.refdata.RODRecordList;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
-import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.utils.cmdLine.Argument;
-import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.genotype.Variation;
-
-import java.io.PrintStream;
-import java.io.FileNotFoundException;
-import java.util.*;
-
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMReadGroupRecord;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * This walker is designed to work as the first pass in a two-pass processing step.
- * It does a by-locus traversal operating only at sites that are not in dbSNP.
- * We assume that all mismatches we see are therefore errors.
- * This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
- * Since there is a large amount of data one can then calculate an empirical probability of error
- *   given the particular covariates seen at this site, where p(error) = num mismatches / num observations
- * The output file is a CSV list of (several covariate values, num observations, num mismatches, empirical quality score)
- * The first lines of the output file give the name of the covariate classes that were used for this calculation.
- *
- * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and must be at the start of the list.
- * Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
- */
-
-@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
-@WalkerName( "CountCovariatesRefactored" )
-//@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality // BUGBUG taken out to match old integration tests
-@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
-public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
-
-    @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
-    private Boolean LIST_ONLY = false;
-    @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are already added for you.", required=false)
-    private String[] COVARIATES = null;
-    @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
-    private boolean USE_ORIGINAL_QUALS = false;
-    @Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
-    private int WINDOW_SIZE = 3;
-    @Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
-    private String RECAL_FILE = "output.recal_data.csv";
-    @Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. For debugging purposes only.")
-    private boolean NO_PRINT_HEADER = false;
-    @Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
-    private boolean VALIDATE_OLD_RECALIBRATOR = false;
-    @Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
-    private boolean USE_SLX_PLATFORM = false;
-
-    private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
-    private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
-    private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
-    private int sizeOfReadDatumHashMap = 0;
-
-    private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file
-    private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
-    private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
-
-    //private final String versionNumber = "2.0.0"; // major version, minor version, and build number
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // initialize
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Parse the -cov arguments and create a list of covariates to be used here
-     * Based on the covariates' estimates for initial capacity allocate the data hashmap
-     */
-    public void initialize() {
-
-        //logger.info( "CovariateCounterWalker version: " + versionNumber );
-        // Get a list of all available covariates
-        final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
-   
-        // Print and exit if that's what was requested
-        if ( LIST_ONLY ) {
-            out.println( "Available covariates:" );
-            for( Class<?> covClass : classes ) {
-                out.println( covClass.getSimpleName() );
-            }
-            out.println();
-            
-            System.exit( 0 ); // early exit here because user requested it
-        }
-
-        // Warn the user if no dbSNP file was specified
-        boolean foundDBSNP = false;
-        for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
-            if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
-                foundDBSNP = true;
-            }
-        }
-        if( !foundDBSNP ) {
-            Utils.warnUser("This calculation is critically dependent on being able to skip over known variant sites. Are you sure you want to be running without a dbSNP rod specified?");
-        }
-
-
-        // Initialize the requested covariates by parsing the -cov argument
-        requestedCovariates = new ArrayList<Covariate>();
-        int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
-        if( VALIDATE_OLD_RECALIBRATOR ) {
-            requestedCovariates.add( new ReadGroupCovariate() );
-            requestedCovariates.add( new CycleCovariate() ); // unfortunately order is different here in order to match the old recalibrator exactly
-            requestedCovariates.add( new QualityScoreCovariate() );
-            requestedCovariates.add( new DinucCovariate() );
-        } else if( COVARIATES != null ) {
-            if(COVARIATES[0].equalsIgnoreCase("ALL")) { // the user wants ALL covariates to be used
-                requestedCovariates.add( new ReadGroupCovariate() ); // first add the required covariates then add the rest by looping over all implementing classes that were found
-                requestedCovariates.add( new QualityScoreCovariate() );
-                for( Class<?> covClass : classes ) {
-                    try {
-                        Covariate covariate = (Covariate)covClass.newInstance();
-                        estimatedCapacity *= covariate.estimatedNumberOfBins();
-                        // Some covariates need parameters (user supplied command line arguments) passed to them
-                        if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
-                        if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // these were already added so don't add them again
-                            requestedCovariates.add( covariate );
-                        }
-                    } catch ( InstantiationException e ) {
-                        throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
-                    } catch ( IllegalAccessException e ) {
-                        throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
-                    }
-               }
-            } else { // The user has specified a list of several covariates
-                int covNumber = 1;
-                for( String requestedCovariateString : COVARIATES ) {
-                    boolean foundClass = false;
-                    for( Class<?> covClass : classes ) {
-                        if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
-                            foundClass = true;
-                            // Read Group Covariate and Quality Score Covariate are required covariates for the recalibration calculation and must begin the list
-                            if( (covNumber == 1 && !requestedCovariateString.equalsIgnoreCase( "ReadGroupCovariate" )) ||
-                                (covNumber == 2 && !requestedCovariateString.equalsIgnoreCase( "QualityScoreCovariate" )) ) {
-                                throw new StingException("ReadGroupCovariate and QualityScoreCovariate are required covariates for the recalibration calculation and must begin the list" );
-                            }
-                            covNumber++;
-                            try {
-                                // Now that we've found a matching class, try to instantiate it
-                                Covariate covariate = (Covariate)covClass.newInstance();
-                                estimatedCapacity *= covariate.estimatedNumberOfBins();
-                                // Some covariates need parameters (user supplied command line arguments) passed to them
-                                if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
-                                requestedCovariates.add( covariate );
-                            } catch ( InstantiationException e ) {
-                                throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
-                            } catch ( IllegalAccessException e ) {
-                                throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
-                            }
-                        }
-                    }
-
-                    if( !foundClass ) {
-                        throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." );
-                    }
-                }
-            }
-        } else { // No covariates were specified by the user so add the default, required ones
-            Utils.warnUser( "Using default set of covariates because none were specified. Using ReadGroupCovariate and QualityScoreCovariate only." );
-            requestedCovariates.add( new ReadGroupCovariate() );
-            requestedCovariates.add( new QualityScoreCovariate() );
-            estimatedCapacity = 300 * 40;
-        }
-
-        logger.info( "The covariates being used here: " );
-        logger.info( requestedCovariates );
-
-        if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; }  // Don't want to crash with out of heap space exception
-        dataManager = new RecalDataManager( estimatedCapacity );
-        readDatumHashMap = new IdentityHashMap<SAMRecord, ReadHashDatum>();
-    }
-
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // map
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * For each read at this locus get the various covariate values and increment that location in the map based on
-     *   whether or not the base matches the reference at this particular location
-     * @param tracker The reference metadata tracker
-     * @param ref The reference context
-     * @param context The alignment context
-     * @return Returns 1, but this value isn't used in the reduce step
-     */
-    public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
-
-        // pull out anything passed by -B name,type,file that has the name "dbsnp"
-        final RODRecordList<ReferenceOrderedDatum> dbsnpRODs = tracker.getTrackData( "dbsnp", null );
-        boolean isSNP = false;
-        if (dbsnpRODs != null) {
-            for( ReferenceOrderedDatum rod : dbsnpRODs ) {
-                if( ((Variation)rod).isSNP() ) {
-                    isSNP = true; // at least one of the rods says this is a snp site
-                    break;
-                }
-            }
-        }
-
-        // Only use data from non-dbsnp sites
-        // Assume every mismatch at a non-dbsnp site is indicitive of poor quality
-        if( !isSNP ) {
-            final List<SAMRecord> reads = context.getReads();
-            final List<Integer> offsets = context.getOffsets();
-            SAMRecord read;
-            int offset;
-            String readGroupId;
-            byte[] quals;
-            byte[] bases;
-            byte refBase;
-            byte prevBase;
-            String platform;
-            byte[] colorSpaceQuals;
-            ReadHashDatum readDatum;
-            boolean isNegStrand;
-            int mappingQuality;
-            int length;
-
-            final int numReads = reads.size();
-            // For each read at this locus
-            for( int iii = 0; iii < numReads; iii++ ) {
-                read = reads.get(iii);
-                offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
-
-                readDatum = readDatumHashMap.get( read );
-                if( readDatum == null ) {
-
-                    // If the HashMap of read objects has grown too large then throw out the (mostly stale) reads
-                    if( sizeOfReadDatumHashMap > 100000 ) { //BUGBUG: Can I make this number larger?
-                        readDatumHashMap.clear();
-                        sizeOfReadDatumHashMap = 0;
-                    }
-
-                    // This read isn't in the hashMap yet so fill out the datum and add it to the map so that we never have to do the work again
-                    quals = read.getBaseQualities();
-                    // Check if we need to use the original quality scores instead
-                    if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
-                        Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
-                        if ( obj instanceof String )
-                            quals = QualityUtils.fastqToPhred((String)obj);
-                        else {
-                            throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
-                        }
-                    }
-                    bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
-                    isNegStrand = read.getReadNegativeStrandFlag();
-                    final SAMReadGroupRecord readGroup = read.getReadGroup();
-                    readGroupId = readGroup.getReadGroupId();
-                    platform = readGroup.getPlatform();
-                    mappingQuality = read.getMappingQuality();
-                    length = bases.length;
-                    if( USE_SLX_PLATFORM ) {
-                        platform = "ILLUMINA";
-                    }
-
-                    readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length );
-                    readDatumHashMap.put( read, readDatum );
-                    sizeOfReadDatumHashMap++;
-                }
-
-                if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
-
-                    // skip first and last base because there is no dinuc
-                    // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
-                    if( offset > 0 ) {
-                        if( offset < readDatum.length - 1 ) {
-                            // skip if base quality is zero
-                            if( readDatum.quals[offset] > 0 ) {
-
-                                refBase = (byte)ref.getBase();
-                                prevBase = readDatum.bases[offset-1];
-
-                                // Get the complement base strand if we are a negative strand read
-                                if( readDatum.isNegStrand ) {
-                                    prevBase = readDatum.bases[offset+1];
-                                }
-
-                                // skip if this base or the previous one was an 'N' or etc.
-                                if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)(readDatum.bases[offset]) ) ) {
-
-                                    // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
-                                    colorSpaceQuals = null;
-                                    if( readDatum.platform.equalsIgnoreCase("SOLID") ) {
-                                        colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
-                                    }
-                                    if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
-                                    {
-                                        // This base finally passed all the checks, so add it to the big hashmap
-                                        updateDataFromRead( readDatum, offset, refBase );
-                                    }
-                                } else {
-                                    if( VALIDATE_OLD_RECALIBRATOR ) {
-                                        countedBases++; // replicating a small bug in the old recalibrator
-                                    }
-                                }
-                            }
-                        } else { // at the last base in the read so we can remove it from our IdentityHashMap
-                            readDatumHashMap.remove( read );
-                            sizeOfReadDatumHashMap--;
-                        }
-                    }
-                }
-            }
-            countedSites++;
-
-        } else { // We skipped over the dbSNP site
-            skippedSites++;
-        }
-        
-        return 1; // This value isn't actually used anywhere
-    }
-
-    /**
-     * Major workhorse routine for this walker.
-     * Loop through the list of requested covariates and pick out the value from the read, offset, and reference
-     * Using the list of covariate values as a key, pick out the RecalDatum and increment,
-     *   adding one to the number of observations and potentially one to the number of mismatches
-     * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
-     *   because pulling things out of the SAMRecord is an expensive operation.
-     * @param readDatum The ReadHashDatum holding all the important properties of this read
-     * @param offset The offset in the read for this locus
-     * @param refBase The reference base at this locus
-     */
-    private void updateDataFromRead(final ReadHashDatum readDatum, final int offset, final byte refBase) {
-
-        List<Comparable> key = new ArrayList<Comparable>();
-        
-        // Loop through the list of requested covariates and pick out the value from the read, offset, and reference
-        for( Covariate covariate : requestedCovariates ) {
-        	key.add( covariate.getValue( readDatum, offset ) );
-        }
-
-    	// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
-        RecalDatum datum = dataManager.data.get( key );
-        if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
-            datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
-            if( VALIDATE_OLD_RECALIBRATOR ) {
-                dataManager.data.myPut( key, datum );
-            } else {
-                dataManager.data.put( key, datum );
-            }
-        }
-        
-        // Need the bases to determine whether or not we have a mismatch
-        byte base = readDatum.bases[offset];
-        
-        // Add one to the number of observations and potentially one to the number of mismatches
-        datum.increment( (char)base, (char)refBase ); // dangerous: if you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
-        countedBases++;
-    }
-
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // reduce
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Initialize the reudce step by creating a PrintStream from the filename specified as an argument to the walker.
-     * @return returns A PrintStream created from the -rf filename
-     */
-    public PrintStream reduceInit() {
-        try {
-            return new PrintStream( RECAL_FILE );
-        } catch ( FileNotFoundException e ) {
-            throw new RuntimeException( "Couldn't open output file: ", e );
-        }
-    }
-
-    /**
-     * The Reduce method doesn't do anything for this walker.
-     * @param value Result of the map. This value is immediately ignored.
-     * @param recalTableStream The PrintStream used to output the CSV data
-     * @return returns The PrintStream used to output the CSV data
-     */
-    public PrintStream reduce( Integer value, PrintStream recalTableStream ) {
-        return recalTableStream; // nothing to do here
-    }
-
-    /**
-     * Write out the full data hashmap to disk in CSV format
-     * @param recalTableStream The PrintStream to write out to
-     */
-    public void onTraversalDone( PrintStream recalTableStream ) {
-        logger.info( "Writing raw recalibration data..." );
-        outputToCSV( recalTableStream );
-        logger.info( "...done!" );
-    
-        recalTableStream.close();
-    }
-
-    /**
-     * For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
-     * @param recalTableStream The PrintStream to write out to
-     */
-    private void outputToCSV( final PrintStream recalTableStream ) {
-
-        if( VALIDATE_OLD_RECALIBRATOR ) {
-            recalTableStream.printf("# collapsed_pos    false%n");
-            recalTableStream.printf("# collapsed_dinuc  false%n");
-            recalTableStream.printf("# counted_sites    %d%n", countedSites);
-            recalTableStream.printf("# counted_bases    %d%n", countedBases);
-            recalTableStream.printf("# skipped_sites    %d%n", skippedSites);
-            recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
-            recalTableStream.printf("rg,pos,Qrep,dn,nBases,nMismatches,Qemp%n");
-            // For each entry in the data hashmap
-            for( Pair<List<? extends Comparable>,RecalDatum> entry : dataManager.data.entrySetSorted4() ) {
-                // For each Covariate in the key
-                for( Comparable comp : entry.first ) {
-                    // Output the Covariate's value
-                    recalTableStream.print( comp + "," );
-                }
-                // Output the RecalDatum entry
-                recalTableStream.println( entry.second.outputToCSV() );
-            }
-        } else {
-            if( !NO_PRINT_HEADER ) {
-                recalTableStream.printf("# Counted Sites    %d%n", countedSites);
-                recalTableStream.printf("# Counted Bases    %d%n", countedBases);
-                recalTableStream.printf("# Skipped Sites    %d%n", skippedSites);
-                recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
-                for( Covariate cov : requestedCovariates ) {
-                    // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
-                    recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
-                }
-            }
-            // For each entry in the data hashmap
-            for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
-                // For each Covariate in the key
-                for( Comparable comp : entry.getKey() ) {
-                    // Output the Covariate's value
-                    if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
-                    recalTableStream.print( comp + "," );
-                }
-                // Output the RecalDatum entry
-                recalTableStream.println( entry.getValue().outputToCSV() );
-            }
-        }
-    }
-}
-
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java
@ -1,204 +0,0 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-
-import java.util.*;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 6, 2009
- *
- * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
- */
-
-public class RecalDataManager {
-    public NHashMap<RecalDatum> data; // the full dataset
-    private NHashMap<RecalDatum> dataCollapsedReadGroup; // table where everything except read group has been collapsed
-    private NHashMap<RecalDatum> dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
-    private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // tables where everything except read group, quality score, and given covariate has been collapsed
-    public NHashMap<Double> dataSumExpectedErrors; // table used to calculate the overall aggregate quality score in which everything except read group is collapsed
-
-    private NHashMap<Double> dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
-    private NHashMap<Double> dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
-    private ArrayList<NHashMap<Double>> dataCollapsedByCovariateDouble; // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed
-
-
-    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // the tag that holds the original quality scores
-    public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // the tag that holds the color space quality scores for SOLID bams
-
-    RecalDataManager() {
-    	data = new NHashMap<RecalDatum>();
-    }
-
-    RecalDataManager( final int estimatedCapacity, final boolean createCollapsedTables, final int numCovariates ) {
-    	if( createCollapsedTables ) { // initialize all the collapsed tables
-            dataCollapsedReadGroup = new NHashMap<RecalDatum>();
-            dataCollapsedQualityScore = new NHashMap<RecalDatum>();
-            dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
-            for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
-                dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
-            }
-            dataSumExpectedErrors = new NHashMap<Double>();
-        } else {
-            data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f);
-        }            
-    }
-    
-    RecalDataManager( final int estimatedCapacity ) {
-        data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f ); // second arg is the 'loading factor',
-                                                                    //   a number to monkey around with when optimizing performace of the HashMap
-    }
-
-    /**
-     * Add the given mapping to all of the collapsed hash tables
-     * @param key The list of comparables that is the key for this mapping
-     * @param fullDatum The RecalDatum which is the data for this mapping
-     */
-    public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum ) {
-
-        // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
-        //data.put(key, thisDatum); // add the mapping to the main table
-
-        // create dataCollapsedReadGroup, the table where everything except read group has been collapsed
-        ArrayList<Comparable> newKey = new ArrayList<Comparable>();
-        newKey.add( key.get(0) ); // make a new key with just the read group
-        RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey );
-        if( collapsedDatum == null ) {
-            dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
-        } else {
-            collapsedDatum.increment(fullDatum);
-        }
-
-        // create dataSumExpectedErrors, the table used to calculate the overall aggregate quality score in which everything except read group is collapsed
-        newKey = new ArrayList<Comparable>();
-        newKey.add( key.get(0) ); // make a new key with just the read group
-        Double sumExpectedErrors = dataSumExpectedErrors.get( newKey );
-        if( sumExpectedErrors == null ) {
-            dataSumExpectedErrors.put( newKey, 0.0 );
-        } else {
-            dataSumExpectedErrors.remove( newKey );
-            sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * fullDatum.getNumObservations();
-            dataSumExpectedErrors.put( newKey, sumExpectedErrors );
-        }
-
-        newKey = new ArrayList<Comparable>();
-        // create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
-        newKey.add( key.get(0) ); // make a new key with the read group ...
-        newKey.add( key.get(1) ); //                                    and quality score
-        collapsedDatum = dataCollapsedQualityScore.get( newKey );
-        if( collapsedDatum == null ) {
-            dataCollapsedQualityScore.put( newKey, new RecalDatum(fullDatum) );
-        } else {
-            collapsedDatum.increment(fullDatum);
-        }
-
-        // create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
-        for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { // readGroup and QualityScore aren't counted
-            newKey = new ArrayList<Comparable>();
-            newKey.add( key.get(0) ); // make a new key with the read group ...
-            newKey.add( key.get(1) ); //                                    and quality score ...
-            newKey.add( key.get(iii + 2) ); //                                                    and the given covariate
-            collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
-            if( collapsedDatum == null ) {
-                dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
-            } else {
-                collapsedDatum.increment(fullDatum);
-            }
-        }
-    }
-
-    /**
-     * Loop over all the collapsed tables and turn the recalDatums found there into an empricial quality score
-     *   that will be used in the sequential calculation in TableRecalibrationWalker
-     * @param numCovariates The number of covariates you have determines the number of tables to create
-     * @param smoothing The smoothing paramter that goes into empirical quality score calculation
-     */
-    public final void generateEmpiricalQualities( final int numCovariates, final int smoothing ) {
-
-        dataCollapsedReadGroupDouble = new NHashMap<Double>();
-        dataCollapsedQualityScoreDouble = new NHashMap<Double>();
-        dataCollapsedByCovariateDouble = new ArrayList<NHashMap<Double>>();
-        for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
-            dataCollapsedByCovariateDouble.add( new NHashMap<Double>() );
-        }
-
-        // Hash the empirical quality scores so we don't have to call Math.log at every base for every read
-        // Looping over the entrySet is really expensive but worth it
-        for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedReadGroup.entrySet() ) {
-            dataCollapsedReadGroupDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
-        }
-        for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedQualityScore.entrySet() ) {
-            dataCollapsedQualityScoreDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
-        }
-        for( int iii = 0; iii < numCovariates - 2; iii++ ) {
-            for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedByCovariate.get(iii).entrySet() ) {
-                dataCollapsedByCovariateDouble.get(iii).put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
-            }
-        }
-
-        dataCollapsedQualityScore.clear();
-        dataCollapsedByCovariate.clear();
-        dataCollapsedQualityScore = null; // will never need this again
-        dataCollapsedByCovariate = null; // will never need this again
-        if( data!=null ) {
-            data.clear();
-            data = null; // will never need this again
-        }
-    }
-
-    /**
-     * Get the appropriate collapsed table out of the set of all the tables held by this Object
-     * @param covariate Which covariate indexes the desired collapsed HashMap
-     * @return The desired collapsed HashMap
-     */
-    public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
-        if( covariate == 0) {
-            return dataCollapsedReadGroup; // table where everything except read group has been collapsed
-        } else if( covariate == 1 ) {
-            return dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
-        } else {
-            return dataCollapsedByCovariate.get( covariate - 2 ); // table where everything except read group, quality score, and given covariate has been collapsed
-        }
-    }
-
-    /**
-     * Get the appropriate collapsed table of emprical quality out of the set of all the tables held by this Object
-     * @param covariate Which covariate indexes the desired collapsed NHashMap<Double>
-     * @return The desired collapsed NHashMap<Double>
-     */
-    public final NHashMap<Double> getCollapsedDoubleTable( final int covariate ) {
-        if( covariate == 0) {
-            return dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
-        } else if( covariate == 1 ) {
-            return dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
-        } else {
-            return dataCollapsedByCovariateDouble.get( covariate - 2 ); // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed
-        }
-    }
-}
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java
@ -1,454 +0,0 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
-
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMFileWriter;
-import net.sf.samtools.SAMReadGroupRecord;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.gatk.walkers.WalkerName;
-import org.broadinstitute.sting.gatk.walkers.Requires;
-import org.broadinstitute.sting.gatk.walkers.DataSource;
-import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.utils.cmdLine.Argument;
-import org.broadinstitute.sting.utils.*;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
-import java.io.File;
-import java.io.FileNotFoundException;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal.
-
- * For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
- * Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read.
- * This walker then outputs a new bam file with these updated (recalibrated) reads.
- * 
- * Note: This walker expects as input the recalibration table file generated previously by CovariateCounterWalker.
- * Note: This walker is designed to be used in conjunction with CovariateCounterWalker.
- */
-
-@WalkerName("TableRecalibrationRefactored")
-@Requires({ DataSource.READS, DataSource.REFERENCE }) // This walker requires -I input.bam, it also requires -R reference.fasta, but by not saying @requires REFERENCE_BASES I'm telling the
-                                                                                                        // GATK to not actually spend time giving me the refBase array since I don't use it
-public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
-
-    @Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
-    private String RECAL_FILE = null;
-    @Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=false)
-    private SAMFileWriter OUTPUT_BAM = null;
-    @Argument(fullName="preserve_qscores_less_than", shortName="pQ",
-        doc="If provided, bases with quality scores less than this threshold won't be recalibrated.  In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
-    private int PRESERVE_QSCORES_LESS_THAN = 5;
-    @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
-    private boolean USE_ORIGINAL_QUALS = false;
-    @Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
-    private int WINDOW_SIZE = 3;
-    @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
-    private int SMOOTHING = 1;
-    @Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
-    private boolean VALIDATE_OLD_RECALIBRATOR = false;
-    @Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
-    private boolean USE_SLX_PLATFORM = false;
-
-    //public enum RecalibrationMode {
-    //    COMBINATORIAL,
-    //    SEQUENTIAL,
-    //    ERROR
-    //}
-
-    //@Argument(fullName="recalibrationMode", shortName="mode", doc="Which calculation to use when recalibrating, default is SEQUENTIAL", required=false)
-    private String MODE_STRING = "SEQUENTIAL";
-    //public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: do we need to support the other modes?
-
-    private RecalDataManager dataManager;
-    private ArrayList<Covariate> requestedCovariates;
-    private ArrayList<Comparable> fullCovariateKey; // the list that will be used over and over again to hold the full set of covariate values
-    private ArrayList<Comparable> collapsedTableKey; // the key that will be used over and over again to query the collapsed tables
-
-    private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
-    private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
-    private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
-
-    //private final String versionNumber = "2.0.0"; // major version, minor version, and build number
-    
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // initialize
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Read in the recalibration table input file.
-     * Parse the list of covariate classes used during CovariateCounterWalker.
-     * Parse the CSV data and populate the hashmap.
-     */
-    public void initialize() {
-
-        //logger.info( "TableRecalibrationWalker version: " + versionNumber );
-        // Get a list of all available covariates
-        List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
-
-        int lineNumber = 0;
-        boolean foundAllCovariates = false;
-        int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
-
-        // Warn the user if a dbSNP file was specified since it isn't being used here
-        boolean foundDBSNP = false;
-        for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
-            if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
-                foundDBSNP = true;
-            }
-        }
-        if( foundDBSNP ) {
-            Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
-        }
-
-        fullCovariateKey = new ArrayList<Comparable>();
-        collapsedTableKey = new ArrayList<Comparable>();
-
-        // Read in the covariates that were used from the input file
-        requestedCovariates = new ArrayList<Covariate>();
-
-        // Read in the data from the csv file and populate the map
-        logger.info( "Reading in the data from input file..." );
-
-        try {
-            for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
-                lineNumber++;
-                if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches())  {
-                    ; // skip over the comment lines, (which start with '#')
-                }
-                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // the line string is either specifying a covariate or is giving csv data
-                    if( foundAllCovariates ) {
-                        throw new StingException( "Malformed input recalibration file. Found covariate names intermingled with data. " + RECAL_FILE );
-                    } else { // found another covariate in input file
-                        boolean foundClass = false;
-                        for( Class<?> covClass : classes ) {
-
-                            if( line.equalsIgnoreCase( "@!" + covClass.getSimpleName() ) ) { // the "@!" was added by CovariateCounterWalker as a code to recognize covariate class names
-                                foundClass = true;
-                                try {
-                                    Covariate covariate = (Covariate)covClass.newInstance();
-                                    // some covariates need parameters (user supplied command line arguments) passed to them
-                                    if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
-                                    requestedCovariates.add( covariate );
-                                    estimatedCapacity *= covariate.estimatedNumberOfBins();
-                                    
-                                } catch ( InstantiationException e ) {
-                                    throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
-                                } catch ( IllegalAccessException e ) {
-                                    throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) );
-                                }
-                            }
-                        }
-
-                        if( !foundClass ) {
-                            throw new StingException( "Malformed input recalibration file. The requested covariate type (" + line + ") isn't a valid covariate option." );
-                        }
-
-                    }
-
-                } else { // found some data
-                    if( !foundAllCovariates ) {
-                        if( VALIDATE_OLD_RECALIBRATOR ) {
-                            requestedCovariates.add( new ReadGroupCovariate() );
-                            requestedCovariates.add( new QualityScoreCovariate() );
-                            requestedCovariates.add( new CycleCovariate() );
-                            requestedCovariates.add( new DinucCovariate() );
-                        }
-                        foundAllCovariates = true;
-                        if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
-                        final boolean createCollapsedTables = true;
-                        // Initialize the data hashMaps
-                        dataManager = new RecalDataManager( estimatedCapacity, createCollapsedTables, requestedCovariates.size() );
-
-                    }
-                    addCSVData(line); // parse the line and add the data to the HashMap
-                }
-            }
-
-        } catch ( FileNotFoundException e ) {
-            Utils.scareUser("Can not find input file: " + RECAL_FILE);
-        } catch ( NumberFormatException e ) {
-            throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
-        }
-        logger.info( "...done!" );
-
-        logger.info( "The covariates being used here: " );
-        logger.info( requestedCovariates );
-
-        // Create the collapsed tables that are used in the sequential calculation
-        if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
-        	logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
-            dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
-            logger.info( "...done!" );
-        }
-    }
-
-    /**
-     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
-     * @param line A line of CSV data read from the recalibration table data file
-     */
-    private void addCSVData(String line) {
-        String[] vals = line.split(",");
-        ArrayList<Comparable> key = new ArrayList<Comparable>();
-        Covariate cov;
-        int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
-            cov = requestedCovariates.get( iii );
-            if( VALIDATE_OLD_RECALIBRATOR ) {
-                if( iii == 1 ) { // order is different in the old recalibrator
-                    key.add( cov.getValue( vals[2] ) );
-                } else if ( iii == 2 ) {
-                    key.add( cov.getValue( vals[1] ) );
-                } else {
-                    key.add( cov.getValue( vals[iii] ) );
-                }
-            } else {
-                key.add( cov.getValue( vals[iii] ) );
-            }
-        }
-        RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ) );
-        dataManager.addToAllTables( key, datum );
-        
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // map
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
-     * @param refBases References bases over the length of the read
-     * @param read The read to be recalibrated
-     * @return The read with quality scores replaced
-     */
-    public SAMRecord map( char[] refBases, SAMRecord read ) {
-
-        // WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
-        // This is done in order to speed up the code
-
-        byte[] originalQuals = read.getBaseQualities();
-        // Check if we need to use the original quality scores instead
-        if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
-            Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
-            if ( obj instanceof String )
-                originalQuals = QualityUtils.fastqToPhred((String)obj);
-            else {
-                throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
-            }
-        }
-        byte[] recalQuals = originalQuals.clone();
-
-        // These calls are all expensive so only do them once for each read
-        final SAMReadGroupRecord readGroup = read.getReadGroup();
-        final String readGroupId = readGroup.getReadGroupId();
-        String platform = readGroup.getPlatform();
-        final boolean isNegStrand = read.getReadNegativeStrandFlag();
-        if( USE_SLX_PLATFORM ) {
-            platform = "ILLUMINA";
-        }
-        byte[] bases = read.getReadBases();
-        int startPos = 1;
-        int stopPos = bases.length;
-
-        if( isNegStrand ) { // DinucCovariate is responsible for getting the complement base if needed
-            startPos = 0;
-            stopPos = bases.length - 1;
-        }
-
-        ReadHashDatum readDatum = new ReadHashDatum( readGroupId, platform, originalQuals, bases, isNegStrand, read.getMappingQuality(), bases.length );
-
-        // For each base in the read
-        for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
-
-            // Get the covariate values which make up the key
-            for( Covariate covariate : requestedCovariates ) {
-                fullCovariateKey.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
-            }
-
-            recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
-            fullCovariateKey.clear();
-        }
-
-        preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
-
-        // SOLID bams insert the reference base into the read if the color space quality is zero, so don't change their base quality scores
-        if( platform.equalsIgnoreCase("SOLID") ) {
-            byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
-            if(colorSpaceQuals != null) { preserveBadColorSpaceQualities_SOLID( originalQuals, recalQuals, colorSpaceQuals ); }
-        }
-
-        read.setBaseQualities(recalQuals); // overwrite old qualities with new recalibrated qualities
-        if ( read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if the tag isn't already taken in the read
-            read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(originalQuals));
-        }
-
-        return read;
-    }
-
-    /**
-     * Implements a serial recalibration of the reads using the combinational table.
-     * First, we perform a positional recalibration, and then a subsequent dinuc correction.
-     *
-     * Given the full recalibration table, we perform the following preprocessing steps:
-     *
-     *   - calculate the global quality score shift across all data [DeltaQ]
-     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
-     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
-     *   - The final shift equation is:
-     *
-     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) 
-     * @param key The list of Comparables that were calculated from the covariates
-     * @return A recalibrated quality score as a byte
-     */
-    private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
-
-        String readGroupKeyElement = key.get(0).toString();
-        int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
-        byte qualFromRead = (byte)qualityScoreKeyElement;
-
-        // The global quality shift (over the read group only)
-        collapsedTableKey.add( readGroupKeyElement );
-        RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get(collapsedTableKey);
-        Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get(collapsedTableKey);
-        double globalDeltaQ = 0.0;
-        double aggregrateQreported = 0.0;
-        if( globalDeltaQDatum != null ) {
-            aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get(collapsedTableKey) / ((double) globalDeltaQDatum.getNumObservations()) );
-            globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported;
-        }
-
-        // The shift in quality between reported and empirical
-        collapsedTableKey.add( qualityScoreKeyElement );
-        Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get(collapsedTableKey);
-        double deltaQReported = 0.0;
-        if( deltaQReportedEmpirical != null ) {
-            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
-        }
-        
-        // The shift in quality due to each covariate by itself in turn
-        double deltaQCovariates = 0.0;
-        Double deltaQCovariateEmpirical;
-        double deltaQPos = 0.0;
-        double deltaQDinuc = 0.0;
-        for( int iii = 2; iii < key.size(); iii++ ) {
-            collapsedTableKey.add( key.get(iii) ); // the given covariate
-            deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get(collapsedTableKey);
-            if( deltaQCovariateEmpirical != null ) {
-                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
-                if( VALIDATE_OLD_RECALIBRATOR ) {
-                    if(iii==2) { deltaQPos = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); } // BUGBUG: only here to validate against the old recalibrator
-                    if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
-                }
-            }
-            collapsedTableKey.remove( 2 ); // this new covariate is always added in at position 2 in the collapsedTableKey list
-        }
-
-        double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
-        byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
-
-
-        // Verbose printouts used to validate with old recalibrator
-        //if(key.contains(null)) {
-        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
-        //}
-        //else {
-        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
-        //}
-
-        collapsedTableKey.clear();
-        return newQualityByte;
-    }
-
-    /**
-     * Loop of the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
-     * @param originalQuals The list of original base quality scores
-     * @param recalQuals A list of the new recalibrated quality scores
-     */
-    private void preserveQScores( final byte[] originalQuals, byte[] recalQuals ) {
-        for( int iii = 0; iii < recalQuals.length; iii++ ) {
-            if ( originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN ) {
-                recalQuals[iii] = originalQuals[iii];
-            }
-        }
-    }
-
-    /**
-     * Loop of the list of qualities and overwrite the newly recalibrated score to be the original score if the color space quality is zero
-     * @param originalQuals The list of original base quality scores
-     * @param recalQuals A list of the new recalibrated quality scores
-     * @param colorSpaceQuals The list of color space quality scores for this read
-     */
-    private void preserveBadColorSpaceQualities_SOLID( final byte[] originalQuals, byte[] recalQuals, final byte[] colorSpaceQuals ) {
-        for( int iii = 0; iii < recalQuals.length; iii++ ) {
-            if ( colorSpaceQuals[iii] <= 0 ) { //BUGBUG: This isn't exactly correct yet
-                recalQuals[iii] = originalQuals[iii];
-            }
-        }
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // reduce
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    /**
-     * Nothing start the reduce with a new handle to the output bam file
-     * @return A FileWriter pointing to a new bam file
-     */
-    public SAMFileWriter reduceInit() {
-        return OUTPUT_BAM;
-    }
-
-    /**
-     * Output each read to disk
-     * @param read The read to output
-     * @param output The FileWriter to write the read to
-     * @return The FileWriter
-     */
-    public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
-        if ( output != null ) {
-            output.addAlignment(read);
-        } else {
-            out.println(read.format());
-        }
-
-        return output;
-    }
-}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java
@ -1,184 +0,0 @@
-// our package
-package org.broadinstitute.sting.gatk.walkers.recalibration;
-
-import java.util.*;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMFileHeader;
-
-// the imports for unit testing.
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
-import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
-
-import java.util.HashSet;
-import java.io.FileNotFoundException;
-import java.io.File;
-
-/**
- * Basic unit test for RecalData
- */
-public class CovariateCounterTest extends BaseTest {
-    String readGroup1 = "rg1";
-    String readGroup2 = "rg2";
-    Set<String> readGroups = new HashSet<String>();
-
-    SAMFileHeader header;
-
-    SAMRecord read1, read2, read3;
-
-    byte bases1[] = {'a', 't', 'c', 'g', 'a'};
-    byte quals1[] = {1, 2, 3, 4, 5};
-    byte quals3[] = {1, 2, 5, 5, 5};
-    byte bases2[] = {'t', 'c', 'g', 'a', 't'};
-    byte quals2[] = {2, 2, 4, 5, 2};
-
-    /*
-    public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc ) {
-    public Set<String> getReadGroups() {
-    public boolean isCollapseDinuc() {
-    public boolean isCollapsePos() {
-    public int getNReadGroups() {
-    private RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
-    public List<RecalData> getRecalData(String readGroup) {
-    public int updateDataFromRead( String rg, SAMRecord read, int offset, char ref ) {
-    */
-
-    /**
-     * The fasta, for comparison.
-     */
-    protected static IndexedFastaSequenceFile sequenceFile = null;
-
-    CovariateCounter c;
-
-    /**
-     * Initialize the fasta.
-     */
-    @BeforeClass
-    public static void initialize() throws FileNotFoundException {
-        sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
-        GenomeLocParser.setupRefContigOrdering(sequenceFile);
-
-    }
-
-    @Before
-    public void initializeBefore() {
-        header = ArtificialSAMUtils.createArtificialSamHeader(2,0,247249719);
-        readGroups.addAll(Arrays.asList(readGroup1, readGroup2));
-        ArtificialSAMUtils.createDefaultReadGroup( header, readGroup1, "sample1" );
-        ArtificialSAMUtils.createDefaultReadGroup( header, readGroup2, "sample2" ); 
-        c = new CovariateCounter( readGroups, false, false, false );
-
-        read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1, bases1, quals1);
-        read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",1,1, bases2, quals2);
-        read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",1,1, bases1, quals3);
-    }
-
-
-    @Test
-    public void testCovariateCounterSetup() {
-        Assert.assertEquals("Number of read groups is wrong", c.getNReadGroups(), 2);
-        Assert.assertEquals("Read group identities are wrong", c.getReadGroups(), readGroups);
-        Assert.assertEquals("Incorrectly collapsed counter", c.isCollapseDinuc(), false);
-        Assert.assertEquals("Incorrectly collapsed counter", c.isCollapsePos(), false);
-    }
-
-    @Test
-    public void testOneRead() {
-        for ( int i = 1; i < read1.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false);
-        c.printState();
-
-        Assert.assertEquals("Incorrect mapping to recal bin", c.getRecalData(readGroup1, 0, quals1[0], 'A', (char)bases1[0]).N, 0);
-        for ( int i = 1; i < bases1.length; i++ ) {
-            RecalData datum = c.getRecalData(readGroup1, i, quals1[i], (char)bases1[i-1], (char)bases1[i]);
-            System.out.printf("%s%n", datum);
-            Assert.assertNotNull("Incorrect mapping to recal bin", datum);
-            Assert.assertEquals("Bad mismatch count", datum.B, 0);
-            Assert.assertEquals("Bad base count", datum.N, 1);
-            Assert.assertEquals("Prevbase is bad", datum.dinuc.charAt(0), bases1[i-1]);
-            Assert.assertEquals("Base is bad", datum.dinuc.charAt(1), bases1[i]);
-            Assert.assertEquals("Qual is bad", datum.qual, quals1[i]);
-        }
-    }
-
-    @Test
-    public void testTwoReads() {
-        for ( int i = 1; i < read1.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false);
-        for ( int i = 1; i < read2.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup2, read2, i, (char)read2.getReadBases()[i], false);
-        c.printState();
-
-        Assert.assertEquals("Incorrect mapping to recal bin", c.getRecalData(readGroup1, 0, quals1[0], 'A', (char)bases1[0]).N, 0);
-        for ( int i = 1; i < bases1.length; i++ ) {
-            RecalData datum = c.getRecalData(readGroup1, i, quals1[i], (char)bases1[i-1], (char)bases1[i]);
-            System.out.printf("%s%n", datum);
-            Assert.assertNotNull("Incorrect mapping to recal bin", datum);
-            Assert.assertEquals("Bad mismatch count", datum.B, 0);
-            Assert.assertEquals("Bad base count", datum.N, 1);
-            Assert.assertEquals("Prevbase is bad", datum.dinuc.charAt(0), bases1[i-1]);
-            Assert.assertEquals("Base is bad", datum.dinuc.charAt(1), bases1[i]);
-            Assert.assertEquals("Qual is bad", datum.qual, quals1[i]);
-        }
-    }
-
-    @Test
-    public void testTwoReadsSameGroup() {
-        for ( int i = 1; i < read1.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false);
-        for ( int i = 1; i < read2.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false);
-        c.printState();
-
-        for ( int i = 1; i < bases1.length; i++ ) {
-            RecalData datum = c.getRecalData(readGroup1, i, quals1[i], (char)bases1[i-1], (char)bases1[i]);
-            System.out.printf("%s%n", datum);
-            Assert.assertNotNull("Incorrect mapping to recal bin", datum);
-            Assert.assertEquals("Bad mismatch count", datum.B, 0);
-            Assert.assertEquals("Bad base count", datum.N, 2);
-            Assert.assertEquals("Prevbase is bad", datum.dinuc.charAt(0), bases1[i-1]);
-            Assert.assertEquals("Base is bad", datum.dinuc.charAt(1), bases1[i]);
-            Assert.assertEquals("Qual is bad", datum.qual, quals1[i]);
-        }
-    }
-
-    @Test
-    public void testTwoReadsSameGroupNotIdentical() {
-        for ( int i = 1; i < read1.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false);
-        for ( int i = 1; i < read3.getReadBases().length; i++ )
-            c.updateDataFromRead(readGroup1, read3, i, (char)read3.getReadBases()[i], false);
-        c.printState();
-
-        for ( int i = 1; i < bases1.length; i++ ) {
-            RecalData datum = c.getRecalData(readGroup1, i, quals1[i], (char)bases1[i-1], (char)bases1[i]);
-            System.out.printf("%s%n", datum);
-            Assert.assertNotNull("Incorrect mapping to recal bin", datum);
-            Assert.assertEquals("Bad mismatch count", datum.B, 0);
-            Assert.assertEquals("Bad base count", datum.N, quals1[i] == quals3[i] ? 2 : 1);
-            Assert.assertEquals("Prevbase is bad", datum.dinuc.charAt(0), bases1[i-1]);
-            Assert.assertEquals("Base is bad", datum.dinuc.charAt(1), bases1[i]);
-            Assert.assertEquals("Qual is bad", datum.qual, quals1[i]);
-        }
-    }
-
-    @Test (expected = RuntimeException.class)
-    public void testBadReadOffset() {
-        byte bases[] = {'a', 't', 'c', 'g', 'a'};
-        byte quals[] = {1, 2, 3, 4, 5};
-
-        SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1, bases, quals);
-
-        c.updateDataFromRead(readGroup1, read, 0, (char)bases[0], false);
-    }
-    
-}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataTest.java
@ -1,183 +0,0 @@
-// our package
-package org.broadinstitute.sting.gatk.walkers.recalibration;
-
-
-// the imports for unit testing.
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.Before;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.QualityUtils;
-import java.util.HashSet;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Basic unit test for RecalData
- */
-public class RecalDataTest extends BaseTest {
-    RecalData datum, starDatum;
-
-    /**
-     * Tests that we got a string parameter in correctly
-     */
-    @Before
-    public void before() {
-        datum = new RecalData(2, 3, "0", "AA");
-        datum.B = datum.N = 1;
-        starDatum = new RecalData(2, 3, "0", "**");
-        starDatum.B = starDatum.N = 1;
-    }
-
-
-    @Test
-    public void testBasic() {
-        logger.warn("Executing testIsBetween");
-
-        Assert.assertTrue(datum.N == 1);
-        Assert.assertTrue(datum.B == 1);
-        Assert.assertTrue(datum.pos == 2);
-        Assert.assertTrue(datum.qual == 3);
-        Assert.assertTrue(datum.readGroup.equals("0"));
-        Assert.assertTrue(datum.dinuc.equals("AA"));
-
-        Assert.assertTrue(starDatum.N == 1);
-        Assert.assertTrue(starDatum.B == 1);
-        Assert.assertTrue(starDatum.pos == 2);
-        Assert.assertTrue(starDatum.qual == 3);
-        Assert.assertTrue(starDatum.readGroup.equals("0"));
-        Assert.assertTrue(starDatum.dinuc.equals("**"));
-    }
-
-    @Test
-    public void testInc() {
-        logger.warn("Executing testInc");
-
-        datum.inc(1L, 0L);
-        Assert.assertTrue(datum.N == 2);
-        Assert.assertTrue(datum.B == 1);
-
-        datum.inc('A', 'A');
-        Assert.assertTrue(datum.N == 3);
-        Assert.assertTrue(datum.B == 1);
-
-        datum.inc('A', 'C');
-        Assert.assertTrue(datum.N == 4);
-        Assert.assertTrue(datum.B == 2);
-    }
-
-
-    @Test
-    public void testEmpQual() {
-        logger.warn("Executing testEmpQual");
-
-        datum.B = 1;
-        datum.N = 1;
-        Assert.assertEquals(datum.empiricalQualDouble(), 0.0, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  1);
-
-        datum.B = 1;
-        datum.N = 2;
-        Assert.assertEquals(datum.empiricalQualDouble(), 3.0103, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  3);
-
-        datum.B = 1;
-        datum.N = 3;
-        Assert.assertEquals(datum.empiricalQualDouble(),  4.771213, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  5);
-
-        datum.B = 1;
-        datum.B = 2;
-        Assert.assertEquals(datum.empiricalQualDouble(),  1.760913, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  2);
-
-        datum.B = 1;
-        datum.N = 10;
-        Assert.assertEquals(datum.empiricalQualDouble(),  10.0, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  10);
-
-        datum.B = 1;
-        datum.N = 100;
-        Assert.assertEquals(datum.empiricalQualDouble(),  20.0, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  20);
-
-        datum.B = 1;
-        datum.N = 1000;
-        Assert.assertEquals(datum.empiricalQualDouble(),  30.0, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  30);
-
-        datum.B = 0;
-        datum.N = 1000;
-        Assert.assertEquals(datum.empiricalQualDouble(),  QualityUtils.MAX_REASONABLE_Q_SCORE, 1e-5);
-        Assert.assertEquals(datum.empiricalQualByte(),  QualityUtils.MAX_REASONABLE_Q_SCORE);
-    }
-
-
-    public void testtoCSVString() {
-        logger.warn("Executing testtoCSVString");
-
-        Assert.assertEquals(datum.toCSVString(false), "0,AA,3,2,1,1,0");
-        Assert.assertEquals(datum.toCSVString(true), "0,AA,3,*,1,1,0");
-    }
-
-
-    public void testFromCSVString() {
-        logger.warn("Executing testFromCSVString");
-
-        Assert.assertEquals(RecalData.fromCSVString("0,AA,3,2,1,1,0").toCSVString(false), datum.toCSVString(false));
-        Assert.assertEquals(RecalData.fromCSVString("0,AA,3,*,1,1,0").toCSVString(false), datum.toCSVString(true));
-        Assert.assertEquals(RecalData.fromCSVString("0,**,3,2,1,1,0").toCSVString(false), starDatum.toCSVString(false));
-        Assert.assertEquals(RecalData.fromCSVString("0,**,3,*,1,1,0").toCSVString(false), starDatum.toCSVString(true));
-    }
-
-    public void testDinucIndex() {
-        logger.warn("Executing testDinucIndex");
-
-        HashSet<Integer> indices = new HashSet<Integer>();
-        HashSet<Byte> unknownBytes = new HashSet<Byte>();
-        byte bases[] = {'A', 'C', 'G', 'T', '*', 'N'};
-        unknownBytes.add((byte)'*');
-        unknownBytes.add((byte)'N');
-
-        for ( int i = 0; i < bases.length; i++ ) {
-            for ( int j = 0; j < bases.length; j++ ) {
-                byte[] bp = {bases[i], bases[j]};
-                String s = new String(bp);
-                int index = RecalData.dinucIndex(s);
-                indices.add(index);
-                Assert.assertEquals(index, RecalData.dinucIndex(bases[i], bases[j]));
-                if ( index != -1 ) {
-                    Assert.assertEquals(RecalData.dinucIndex2bases(index), s);
-                } else {
-                    Assert.assertTrue(unknownBytes.contains(bp[0]) || unknownBytes.contains(bp[1]) );
-                }
-            }
-        }
-        Assert.assertEquals(indices.size(), 17);
-    }
-    
-}
--- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
@ -29,7 +29,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
                            " -T CountCovariates" +
                            " -I " + bam +
                            " -L 1:10,000,000-11,000,000" +
-                            " --params %s",
+                            " --validate_old_recalibrator" +
+                            " --use_slx_platform" +
+                            " -recalFile %s",
                    1, // just one output file
                    Arrays.asList(md5));
            List<File> result = executeTest("testCountCovariates1", spec).getFirst();
@ -56,8 +58,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
                                " -T TableRecalibration" +
                                " -I " + bam +
                                " -L 1:10,000,000-20,000,000" +
-                                " --outputBam %s" +
-                                " --params " + paramsFile,
+                                " --validate_old_recalibrator" +
+                                " --use_slx_platform" +
+                                " -outputBam %s" +
+                                " -recalFile " + paramsFile,
                        1, // just one output file
                        Arrays.asList(md5));
                executeTest("testTableRecalibrator1", spec);
--- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RefactoredRecalibrationWalkersIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RefactoredRecalibrationWalkersIntegrationTest.java
@ -1,71 +0,0 @@
-package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
-
-import org.broadinstitute.sting.WalkerTest;
-import org.junit.Test;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Arrays;
-import java.util.List;
-import java.io.File;
-
-public class RefactoredRecalibrationWalkersIntegrationTest extends WalkerTest {
-    static HashMap<String, String> paramsFiles = new HashMap<String, String>();
-
-    @Test
-    public void testCountCovariatesR() {
-        HashMap<String, String> e = new HashMap<String, String>();
-        e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "7be0b7a624d22187e712131f12aae546" );
-        //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
-        e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "dc1a1b6e99f4a47525cc1dce7b6eb1dc" );
-
-        for ( Map.Entry<String, String> entry : e.entrySet() ) {
-            String bam = entry.getKey();
-            String md5 = entry.getValue();
-
-            WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                    "-R /broad/1KG/reference/human_b36_both.fasta" +
-                            " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
-                            " -T CountCovariatesRefactored" +
-                            " -I " + bam +
-                            " -L 1:10,000,000-11,000,000" +
-                            " --validate_old_recalibrator" +
-                            " --use_slx_platform" +
-                            " -recalFile %s",
-                    1, // just one output file
-                    Arrays.asList(md5));
-            List<File> result = executeTest("testCountCovariatesR", spec).getFirst();
-            paramsFiles.put(bam, result.get(0).getAbsolutePath());
-        }
-    }
-
-    @Test
-    public void testTableRecalibratorR() {
-        HashMap<String, String> e = new HashMap<String, String>();
-        e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "3ace4b9b8495429cc32e7008145f4996" );
-        //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
-        //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" );
-
-        for ( Map.Entry<String, String> entry : e.entrySet() ) {
-            String bam = entry.getKey();
-            String md5 = entry.getValue();
-            String paramsFile = paramsFiles.get(bam);
-            System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
-            if ( paramsFile != null ) {
-                WalkerTestSpec spec = new WalkerTestSpec(
-                        "-R /broad/1KG/reference/human_b36_both.fasta" +
-                                " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
-                                " -T TableRecalibrationRefactored" +
-                                " -I " + bam +
-                                " -L 1:10,000,000-20,000,000" +
-                                " --validate_old_recalibrator" +
-                                " --use_slx_platform" +
-                                " -outputBam %s" +
-                                " -recalFile " + paramsFile,
-                        1, // just one output file
-                        Arrays.asList(md5));
-                executeTest("testTableRecalibratorR", spec);
-            }
-        }
-    }
-}
--- a/java/test/org/broadinstitute/sting/utils/ExpandingArrayListTest.java
+++ b/java/test/org/broadinstitute/sting/utils/ExpandingArrayListTest.java
@ -8,7 +8,6 @@ import org.junit.Assert;
 import org.junit.Test;
 import org.junit.Before;
 import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalData;
 import org.broadinstitute.sting.utils.QualityUtils;
 import java.util.HashSet;
 import java.util.Arrays;
--- a/java/test/org/broadinstitute/sting/utils/MergingIteratorTest.java
+++ b/java/test/org/broadinstitute/sting/utils/MergingIteratorTest.java
@ -8,7 +8,6 @@ import org.junit.Assert;
 import org.junit.Test;
 import org.junit.Before;
 import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalData;
 import org.broadinstitute.sting.utils.QualityUtils;

 import java.util.*;