BQSR Parameter cleanup

* get rid of 320C argument that nobody uses. * get rid of DEFAULT_READ_GROUP parameter and functionality (later to become an engine argument).
2012-02-07 13:22:46 -05:00 · 2012-02-07 13:22:46 -05:00 · 0d3ea0401c
parent 717cd4b912
commit 0d3ea0401c
4 changed files with 280 additions and 293 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@ -77,20 +77,20 @@ import java.util.Map;
 * <h2>Output</h2>
 * <p>
 * A recalibration table file in CSV format that is used by the TableRecalibration walker.
- * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.  
+ * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.
 *
- * The first 20 lines of such a file is shown below.  
+ * The first 20 lines of such a file is shown below.
 * * The file begins with a series of comment lines describing:
 * ** The number of counted loci
 * ** The number of counted bases
 * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
- * 
+ *
- * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.  
+ * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.
 *
 * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
- * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of 
+ * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of
 * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
- * 
+ *
 * <pre>
 * # Counted Sites    19451059
 * # Counted Bases    56582018
@ -129,13 +129,14 @@ import java.util.Map;
 *   -cov DinucCovariate \
 *   -recalFile my_reads.recal_data.csv
 * </pre>
 *
 */
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
-@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
+@By(DataSource.READS) // Only look at covered loci, not every loci of the reference file
-@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
+@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})
-@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
+// Filter out all reads with zero or unavailable mapping quality
@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
 // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
 public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
@ -149,7 +150,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
    /////////////////////////////
    // Shared Arguments
    /////////////////////////////
-    @ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
+    @ArgumentCollection
    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    /////////////////////////////
    // Command Line Arguments
@ -160,7 +162,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     * for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
     * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
     */
-    @Input(fullName="knownSites", shortName = "knownSites", doc="A database of known polymorphic sites to skip over in the recalibration algorithm", required=false)
+    @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
    public List<RodBinding<Feature>> knownSites = Collections.emptyList();
    /**
@ -169,31 +171,31 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
     * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
     */
-    @Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file")
+    @Output(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the output covariates table recalibration file")
    @Gather(CountCovariatesGatherer.class)
    public PrintStream RECAL_FILE;
-    @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
+    @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
    private boolean LIST_ONLY = false;
    /**
     * See the -list argument to view available covariates.
     */
-    @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
+    @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
    private String[] COVARIATES = null;
-    @Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)
+    @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
    private boolean USE_STANDARD_COVARIATES = false;
    /////////////////////////////
    // Debugging-only Arguments
    /////////////////////////////
-    @Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
+    @Argument(fullName = "dont_sort_output", shortName = "unsorted", required = false, doc = "If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
    private boolean DONT_SORT_OUTPUT = false;
    /**
     * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
     */
-    @Argument(fullName="run_without_dbsnp_potentially_ruining_quality", shortName="run_without_dbsnp_potentially_ruining_quality", required=false, doc="If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
+    @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
    private boolean RUN_WITHOUT_DBSNP = false;
    /////////////////////////////
@ -217,6 +219,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        /**
         * Adds the values of other to this, returning this
         *
         * @param other
         * @return this object
         */
@ -247,53 +250,55 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     */
    public void initialize() {
-        if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
+        if (RAC.FORCE_PLATFORM != null) {
-        if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
+            RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
        }
        // Get a list of all available covariates
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>( Covariate.class ).getPlugins();
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
-        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>( RequiredCovariate.class ).getPlugins();
+        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
-        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>( StandardCovariate.class ).getPlugins();
+        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
        // Print and exit if that's what was requested
-        if ( LIST_ONLY ) {
+        if (LIST_ONLY) {
-            logger.info( "Available covariates:" );
+            logger.info("Available covariates:");
-            for( Class<?> covClass : covariateClasses ) {
+            for (Class<?> covClass : covariateClasses) {
-                logger.info( covClass.getSimpleName() );
+                logger.info(covClass.getSimpleName());
            }
            logger.info("");
-            System.exit( 0 ); // Early exit here because user requested it
+            System.exit(0); // Early exit here because user requested it
        }
        // Warn the user if no dbSNP file or other variant mask was specified
-        if( knownSites.isEmpty() && !RUN_WITHOUT_DBSNP ) {
+        if (knownSites.isEmpty() && !RUN_WITHOUT_DBSNP) {
            throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
        }
        // Initialize the requested covariates by parsing the -cov argument
        // First add the required covariates
-        if( requiredClasses.size() == 2) { // readGroup and reported quality score
+        if (requiredClasses.size() == 2) { // readGroup and reported quality score
-            requestedCovariates.add( new ReadGroupCovariate() ); // Order is important here
+            requestedCovariates.add(new ReadGroupCovariate()); // Order is important here
-            requestedCovariates.add( new QualityScoreCovariate() );
+            requestedCovariates.add(new QualityScoreCovariate());
-        } else {
+        }
        else {
            throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
        }
        // Next add the standard covariates if -standard was specified by the user
-        if( USE_STANDARD_COVARIATES ) {
+        if (USE_STANDARD_COVARIATES) {
            // We want the standard covariates to appear in a consistent order but the packageUtils method gives a random order
            // A list of Classes can't be sorted, but a list of Class names can be
            final List<String> standardClassNames = new ArrayList<String>();
-            for( Class<?> covClass : standardClasses ) {
+            for (Class<?> covClass : standardClasses) {
-                standardClassNames.add( covClass.getName() );
+                standardClassNames.add(covClass.getName());
            }
            Collections.sort(standardClassNames); // Sort the list of class names
-            for( String className : standardClassNames ) {
+            for (String className : standardClassNames) {
-                for( Class<?> covClass : standardClasses ) { // Find the class that matches this class name
+                for (Class<?> covClass : standardClasses) { // Find the class that matches this class name
-                    if( covClass.getName().equals( className ) ) {
+                    if (covClass.getName().equals(className)) {
                        try {
-                            final Covariate covariate = (Covariate)covClass.newInstance();
+                            final Covariate covariate = (Covariate) covClass.newInstance();
-                            requestedCovariates.add( covariate );
+                            requestedCovariates.add(covariate);
                        } catch (Exception e) {
                            throw new DynamicClassResolutionException(covClass, e);
                        }
@ -302,17 +307,17 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
            }
        }
        // Finally parse the -cov arguments that were provided, skipping over the ones already specified
-        if( COVARIATES != null ) {
+        if (COVARIATES != null) {
-            for( String requestedCovariateString : COVARIATES ) {
+            for (String requestedCovariateString : COVARIATES) {
                boolean foundClass = false;
-                for( Class<?> covClass : covariateClasses ) {
+                for (Class<?> covClass : covariateClasses) {
-                    if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
+                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
                        foundClass = true;
-                        if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) {
+                        if (!requiredClasses.contains(covClass) && (!USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
                            try {
                                // Now that we've found a matching class, try to instantiate it
-                                final Covariate covariate = (Covariate)covClass.newInstance();
+                                final Covariate covariate = (Covariate) covClass.newInstance();
-                                requestedCovariates.add( covariate );
+                                requestedCovariates.add(covariate);
                            } catch (Exception e) {
                                throw new DynamicClassResolutionException(covClass, e);
                            }
@ -320,20 +325,19 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
                    }
                }
-                if( !foundClass ) {
+                if (!foundClass) {
-                    throw new UserException.CommandLineException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." );
+                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
                }
            }
        }
-        logger.info( "The covariates being used here: " );
+        logger.info("The covariates being used here: ");
-        for( Covariate cov : requestedCovariates ) {
+        for (Covariate cov : requestedCovariates) {
-            logger.info( "\t" + cov.getClass().getSimpleName() );
+            logger.info("\t" + cov.getClass().getSimpleName());
-            cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
+            cov.initialize(RAC); // Initialize any covariate member variables using the shared argument collection
        }
    }
    //---------------------------------------------------------------------------------------------------------------
    //
    // map
@ -342,62 +346,63 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
    /**
     * For each read at this locus get the various covariate values and increment that location in the map based on
-     *   whether or not the base matches the reference at this particular location
+     * whether or not the base matches the reference at this particular location
     *
     * @param tracker The reference metadata tracker
-     * @param ref The reference context
+     * @param ref     The reference context
     * @param context The alignment context
     * @return Returns 1, but this value isn't used in the reduce step
     */
-    public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
+    public CountedData map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        // Only use data from non-dbsnp sites
        // Assume every mismatch at a non-dbsnp site is indicative of poor quality
        CountedData counter = new CountedData();
-        if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
+        if (tracker.getValues(knownSites).size() == 0) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
            // For each read at this locus
-            for( final PileupElement p : context.getBasePileup() ) {
+            for (final PileupElement p : context.getBasePileup()) {
                final GATKSAMRecord gatkRead = p.getRead();
                int offset = p.getOffset();
-                if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
+                if (gatkRead.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE)) {
                    continue;
                }
-                if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
+                if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) {
-                {
+                    gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
-                    gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true );
+                    RecalDataManager.parseSAMRecord(gatkRead, RAC);
                    RecalDataManager.parseSAMRecord( gatkRead, RAC );
                    // Skip over reads with no calls in the color space if the user requested it
-                    if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
+                    if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace(gatkRead)) {
-                        gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
+                        gatkRead.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                        continue;
                    }
-                    RecalDataManager.parseColorSpace( gatkRead );
+                    RecalDataManager.parseColorSpace(gatkRead);
-                    gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
+                    gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
                            RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
                }
                // Skip this position if base quality is zero
-                if( gatkRead.getBaseQualities()[offset] > 0 ) {
+                if (gatkRead.getBaseQualities()[offset] > 0) {
                    byte[] bases = gatkRead.getReadBases();
                    byte refBase = ref.getBase();
                    // Skip if this base is an 'N' or etc.
-                    if( BaseUtils.isRegularBase( bases[offset] ) ) {
+                    if (BaseUtils.isRegularBase(bases[offset])) {
                        // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
-                        if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
+                        if (!gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
-                            !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
+                                !RecalDataManager.isInconsistentColorSpace(gatkRead, offset)) {
                            // This base finally passed all the checks for a good base, so add it to the big data hashmap
-                            updateDataFromRead( counter, gatkRead, offset, refBase );
+                            updateDataFromRead(counter, gatkRead, offset, refBase);
-                        } else { // calculate SOLID reference insertion rate
+                        }
-                            if( refBase == bases[offset] ) {
+                        else { // calculate SOLID reference insertion rate
                            if (refBase == bases[offset]) {
                                counter.solidInsertedReferenceBases++;
-                            } else {
+                            }
                            else {
                                counter.otherColorSpaceInconsistency++;
                            }
                        }
@ -405,7 +410,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
                }
            }
            counter.countedSites++;
-        } else { // We skipped over the dbSNP site, and we are only processing every Nth locus
+        }
        else { // We skipped over the dbSNP site, and we are only processing every Nth locus
            counter.skippedSites++;
            updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
        }
@ -413,7 +419,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        return counter;
    }
-   /**
+    /**
     * Update the mismatch / total_base counts for a given class of loci.
     *
     * @param counter The CountedData to be updated
@ -421,13 +427,13 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     * @param refBase The reference base
     */
    private static void updateMismatchCounts(CountedData counter, final AlignmentContext context, final byte refBase) {
-        for( PileupElement p : context.getBasePileup() ) {
+        for (PileupElement p : context.getBasePileup()) {
            final byte readBase = p.getBase();
            final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
-            final int refBaseIndex  = BaseUtils.simpleBaseToBaseIndex(refBase);
+            final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
-            if( readBaseIndex != -1 && refBaseIndex != -1 ) {
+            if (readBaseIndex != -1 && refBaseIndex != -1) {
-                if( readBaseIndex != refBaseIndex ) {
+                if (readBaseIndex != refBaseIndex) {
                    counter.novelCountsMM++;
                }
                counter.novelCountsBases++;
@ -439,13 +445,14 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     * Major workhorse routine for this walker.
     * Loop through the list of requested covariates and pick out the value from the read, offset, and reference
     * Using the list of covariate values as a key, pick out the RecalDatum and increment,
-     *   adding one to the number of observations and potentially one to the number of mismatches
+     * adding one to the number of observations and potentially one to the number of mismatches
     * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
-     *   because pulling things out of the SAMRecord is an expensive operation.
+     * because pulling things out of the SAMRecord is an expensive operation.
-     * @param counter Data structure which holds the counted bases
+     *
     * @param counter  Data structure which holds the counted bases
     * @param gatkRead The SAMRecord holding all the data for this read
-     * @param offset The offset in the read for this locus
+     * @param offset   The offset in the read for this locus
-     * @param refBase The reference base at this locus
+     * @param refBase  The reference base at this locus
     */
    private void updateDataFromRead(CountedData counter, final GATKSAMRecord gatkRead, final int offset, final byte refBase) {
        final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
@ -453,10 +460,10 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
        final NestedHashMap data = dataManager.data; //optimization - create local reference
-        RecalDatumOptimized datum = (RecalDatumOptimized) data.get( key );
+        RecalDatumOptimized datum = (RecalDatumOptimized) data.get(key);
-        if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
+        if (datum == null) { // key doesn't exist yet in the map so make a new bucket and add it
            // initialized with zeros, will be incremented at end of method
-            datum = (RecalDatumOptimized)data.put( new RecalDatumOptimized(), true, (Object[])key );
+            datum = (RecalDatumOptimized) data.put(new RecalDatumOptimized(), true, (Object[]) key);
        }
        // Need the bases to determine whether or not we have a mismatch
@ -464,13 +471,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        final long curMismatches = datum.getNumMismatches();
        // Add one to the number of observations and potentially one to the number of mismatches
-        datum.incrementBaseCounts( base, refBase );
+        datum.incrementBaseCounts(base, refBase);
        counter.countedBases++;
        counter.novelCountsBases++;
        counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
    }
    //---------------------------------------------------------------------------------------------------------------
    //
    // reduce
@ -479,6 +485,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
    /**
     * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
     *
     * @return returns A PrintStream created from the -recalFile filename argument specified to the walker
     */
    public CountedData reduceInit() {
@ -487,11 +494,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
    /**
     * The Reduce method doesn't do anything for this walker.
     *
     * @param mapped Result of the map. This value is immediately ignored.
-     * @param sum The summing CountedData used to output the CSV data
+     * @param sum    The summing CountedData used to output the CSV data
     * @return returns The sum used to output the CSV data
     */
-    public CountedData reduce( CountedData mapped, CountedData sum ) {
+    public CountedData reduce(CountedData mapped, CountedData sum) {
        // Do a dbSNP sanity check every so often
        return validatingDbsnpMismatchRate(sum.add(mapped));
    }
@ -500,16 +508,15 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     * Validate the dbSNP reference mismatch rates.
     */
    private CountedData validatingDbsnpMismatchRate(CountedData counter) {
-        if( ++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY ) {
+        if (++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY) {
            counter.lociSinceLastDbsnpCheck = 0;
-            if( counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L ) {
+            if (counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L) {
-                final double fractionMM_novel = (double)counter.novelCountsMM / (double)counter.novelCountsBases;
+                final double fractionMM_novel = (double) counter.novelCountsMM / (double) counter.novelCountsBases;
-                final double fractionMM_dbsnp = (double)counter.dbSNPCountsMM / (double)counter.dbSNPCountsBases;
+                final double fractionMM_dbsnp = (double) counter.dbSNPCountsMM / (double) counter.dbSNPCountsBases;
-                if( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel ) {
+                if (fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel) {
-                    Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
+                    Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel));
                            String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel) );
                    DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file
                }
            }
@ -518,47 +525,50 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        return counter;
    }
-    public CountedData treeReduce( CountedData sum1, CountedData sum2 ) {
+    public CountedData treeReduce(CountedData sum1, CountedData sum2) {
        return validatingDbsnpMismatchRate(sum1.add(sum2));
    }
    /**
     * Write out the full data hashmap to disk in CSV format
     *
     * @param sum The CountedData to write out to RECAL_FILE
     */
-    public void onTraversalDone( CountedData sum ) {
+    public void onTraversalDone(CountedData sum) {
-        logger.info( "Writing raw recalibration data..." );
+        logger.info("Writing raw recalibration data...");
-        if( sum.countedBases == 0L ) {
+        if (sum.countedBases == 0L) {
            throw new UserException.BadInput("Could not find any usable data in the input BAM file(s).");
        }
-        outputToCSV( sum, RECAL_FILE );
+        outputToCSV(sum, RECAL_FILE);
-        logger.info( "...done!" );
+        logger.info("...done!");
    }
    /**
     * For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
     *
     * @param recalTableStream The PrintStream to write out to
     */
-    private void outputToCSV( CountedData sum, final PrintStream recalTableStream ) {
+    private void outputToCSV(CountedData sum, final PrintStream recalTableStream) {
        recalTableStream.printf("# Counted Sites    %d%n", sum.countedSites);
        recalTableStream.printf("# Counted Bases    %d%n", sum.countedBases);
        recalTableStream.printf("# Skipped Sites    %d%n", sum.skippedSites);
-        recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)sum.countedSites / sum.skippedSites);
+        recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double) sum.countedSites / sum.skippedSites);
-        if( sum.solidInsertedReferenceBases != 0 ) {
+        if (sum.solidInsertedReferenceBases != 0) {
            recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double) sum.countedBases / sum.solidInsertedReferenceBases);
            recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double) sum.countedBases / sum.otherColorSpaceInconsistency);
        }
        // Output header saying which covariates were used and in what order
-        for( Covariate cov : requestedCovariates ) {
+        for (Covariate cov : requestedCovariates) {
-            recalTableStream.print( cov.getClass().getSimpleName().split("Covariate")[0] + "," );
+            recalTableStream.print(cov.getClass().getSimpleName().split("Covariate")[0] + ",");
        }
        recalTableStream.println("nObservations,nMismatches,Qempirical");
-        if( DONT_SORT_OUTPUT ) {
+        if (DONT_SORT_OUTPUT) {
            printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
-        } else {
+        }
        else {
            printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
        }
@ -566,45 +576,47 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
        recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
    }
-    private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
+    private void printMappingsSorted(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
        final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
-        for( Object comp : data.keySet() ) {
+        for (Object comp : data.keySet()) {
            keyList.add((Comparable) comp);
        }
        Collections.sort(keyList);
-        for( Comparable comp : keyList ) {
+        for (Comparable comp : keyList) {
            key[curPos] = comp;
            final Object val = data.get(comp);
-            if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
+            if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
                // For each Covariate in the key
-                for( Object compToPrint : key ) {
+                for (Object compToPrint : key) {
                    // Output the Covariate's value
-                    recalTableStream.print( compToPrint + "," );
+                    recalTableStream.print(compToPrint + ",");
                }
                // Output the RecalDatum entry
-                recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
+                recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
-            } else { // Another layer in the nested hash map
+            }
-                printMappingsSorted( recalTableStream, curPos + 1, key, (Map) val );
+            else { // Another layer in the nested hash map
                printMappingsSorted(recalTableStream, curPos + 1, key, (Map) val);
            }
        }
    }
-    private void printMappings( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
+    private void printMappings(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
-        for( Object comp : data.keySet() ) {
+        for (Object comp : data.keySet()) {
            key[curPos] = comp;
            final Object val = data.get(comp);
-            if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
+            if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
                // For each Covariate in the key
-                for( Object compToPrint : key ) {
+                for (Object compToPrint : key) {
                    // Output the Covariate's value
-                    recalTableStream.print( compToPrint + "," );
+                    recalTableStream.print(compToPrint + ",");
                }
                // Output the RecalDatum entry
-                recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
+                recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
-            } else { // Another layer in the nested hash map
+            }
-                printMappings( recalTableStream, curPos + 1, key, (Map) val );
+            else { // Another layer in the nested hash map
                printMappings(recalTableStream, curPos + 1, key, (Map) val);
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@ -256,32 +256,6 @@ public class RecalDataManager {
    public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
        GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
        // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
        if (readGroup == null) {
            if (RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
                if (!warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null) {
                    Utils.warnUser("The input .bam file contains reads with no read group. " +
                            "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
                            "First observed at read with name = " + read.getReadName());
                    warnUserNullReadGroup = true;
                }
                // There is no readGroup so defaulting to these values
                readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
                readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
                ((GATKSAMRecord) read).setReadGroup(readGroup);
            }
            else {
                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
            }
        }
        if (RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP)) { // Collapse all the read groups into a single common String provided by the user
            final String oldPlatform = readGroup.getPlatform();
            readGroup = new GATKSAMReadGroupRecord(RAC.FORCE_READ_GROUP);
            readGroup.setPlatform(oldPlatform);
            ((GATKSAMRecord) read).setReadGroup(readGroup);
        }
        if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
            readGroup.setPlatform(RAC.FORCE_PLATFORM);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
@ -43,31 +43,15 @@ public class RecalibrationArgumentCollection {
    // Shared Command Line Arguments
    //////////////////////////////////
    @Hidden
    @Argument(fullName = "default_read_group", shortName = "dRG", required = false, doc = "If a read has no read group then default to the provided String.")
    public String DEFAULT_READ_GROUP = null;
    @Hidden
    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
    public String DEFAULT_PLATFORM = null;
    @Hidden
    @Argument(fullName = "force_read_group", shortName = "fRG", required = false, doc = "If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
    public String FORCE_READ_GROUP = null;
    @Hidden
    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
    public String FORCE_PLATFORM = null;
    @Hidden
    @Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
    public int WINDOW_SIZE = 5;
    /**
     * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
     */
    @Hidden
    @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
    public int HOMOPOLYMER_NBACK = 7;
    @Hidden
    @Argument(fullName = "exception_if_no_tile", shortName = "throwTileException", doc = "If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required = false)
    public boolean EXCEPTION_IF_NO_TILE = false;
    /**
     * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
     * reads which have had the reference inserted because of color space inconsistencies.
@ -89,4 +73,10 @@ public class RecalibrationArgumentCollection {
    @Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
    public int CONTEXT_SIZE = 8;
    /**
     * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
     */
    @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
    public int HOMOPOLYMER_NBACK = 7;
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@ -86,12 +86,12 @@ import java.util.regex.Pattern;
 *   -o my_reads.recal.bam \
 *   -recalFile my_reads.recal_data.csv
 * </pre>
 *
 */
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@WalkerName("TableRecalibration")
-@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
+@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
 // This walker requires -I input.bam, it also requires -R reference.fasta
 public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
    public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
@ -99,7 +99,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /////////////////////////////
    // Shared Arguments
    /////////////////////////////
-    @ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
+    @ArgumentCollection
    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    /////////////////////////////
    // Command Line Arguments
@ -110,12 +111,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
     * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
     */
-    @Input(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the input covariates table recalibration .csv file")
+    @Input(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the input covariates table recalibration .csv file")
    public File RECAL_FILE = null;
    /**
     * A new bam file in which the quality scores in each read have been recalibrated. The alignment of the reads is left untouched.
     */
-    @Output(doc="The output recalibrated BAM file", required=true)
+    @Output(doc = "The output recalibrated BAM file", required = true)
    private StingSAMFileWriter OUTPUT_BAM = null;
    /**
@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     * your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases
     * are unmodified during recalibration, so they don't get inappropriately evaluated.
     */
-    @Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
+    @Argument(fullName = "preserve_qscores_less_than", shortName = "pQ", doc = "Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required = false)
    private int PRESERVE_QSCORES_LESS_THAN = 5;
    /**
@ -135,37 +136,36 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     * argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example,
     * --smoothing 15 for a large amount of smoothing.
     */
-    @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
+    @Argument(fullName = "smoothing", shortName = "sm", required = false, doc = "Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
    private int SMOOTHING = 1;
    /**
     * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
     * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
     */
-    @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores")
+    @Argument(fullName = "max_quality_score", shortName = "maxQ", required = false, doc = "The integer value at which to cap the quality scores")
    private int MAX_QUALITY_SCORE = 50;
    /**
     * By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun
     * the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag.
     */
-    @Argument(fullName="doNotWriteOriginalQuals", shortName="noOQs", required=false, doc="If true, we will not write the original quality (OQ) tag for each read")
+    @Argument(fullName = "doNotWriteOriginalQuals", shortName = "noOQs", required = false, doc = "If true, we will not write the original quality (OQ) tag for each read")
    private boolean DO_NOT_WRITE_OQ = false;
    /////////////////////////////
    // Debugging-only Arguments
    /////////////////////////////
    @Hidden
-    @Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
+    @Argument(fullName = "no_pg_tag", shortName = "noPG", required = false, doc = "Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
    private boolean NO_PG_TAG = false;
    @Hidden
-    @Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
+    @Argument(fullName = "fail_with_no_eof_marker", shortName = "requireEOF", required = false, doc = "If no EOF marker is present in the covariates file, exit the program with an exception.")
    private boolean REQUIRE_EOF = false;
    @Hidden
-    @Argument(fullName="skipUQUpdate", shortName="skipUQUpdate", required=false, doc="If true, we will skip the UQ updating step for each read, speeding up the calculations")
+    @Argument(fullName = "skipUQUpdate", shortName = "skipUQUpdate", required = false, doc = "If true, we will skip the UQ updating step for each read, speeding up the calculations")
    private boolean skipUQUpdate = false;
    /////////////////////////////
    // Private Member Variables
    /////////////////////////////
@ -195,8 +195,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     */
    public void initialize() {
-        if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
+        if (RAC.FORCE_PLATFORM != null) {
-        if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
+            RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
        }
        // Get a list of all available covariates
        final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
@ -205,31 +206,33 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
        boolean foundAllCovariates = false;
        // Read in the data from the csv file and populate the data map and covariates list
-        logger.info( "Reading in the data from input csv file..." );
+        logger.info("Reading in the data from input csv file...");
        boolean sawEOF = false;
        try {
-            for ( String line : new XReadLines(RECAL_FILE) ) {
+            for (String line : new XReadLines(RECAL_FILE)) {
                lineNumber++;
-                if ( EOF_MARKER.equals(line) ) {
+                if (EOF_MARKER.equals(line)) {
                    sawEOF = true;
-                } else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() )  {
+                }
                else if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
                    ; // Skip over the comment lines, (which start with '#')
                }
                // Read in the covariates that were used from the input file
-                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
+                else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
-                    if( foundAllCovariates ) {
+                    if (foundAllCovariates) {
-                        throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
+                        throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
-                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
+                    }
                    else { // Found the covariate list in input file, loop through all of them and instantiate them
                        String[] vals = line.split(",");
-                        for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
+                        for (int iii = 0; iii < vals.length - 3; iii++) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
                            boolean foundClass = false;
-                            for( Class<?> covClass : classes ) {
+                            for (Class<?> covClass : classes) {
-                                if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
+                                if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
                                    foundClass = true;
                                    try {
-                                        Covariate covariate = (Covariate)covClass.newInstance();
+                                        Covariate covariate = (Covariate) covClass.newInstance();
-                                        requestedCovariates.add( covariate );
+                                        requestedCovariates.add(covariate);
                                    } catch (Exception e) {
                                        throw new DynamicClassResolutionException(covClass, e);
                                    }
@ -237,107 +240,110 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
                                }
                            }
-                            if( !foundClass ) {
+                            if (!foundClass) {
-                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
+                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
                            }
                        }
                    }
-                } else { // Found a line of data
+                }
-                    if( !foundAllCovariates ) {
+                else { // Found a line of data
                    if (!foundAllCovariates) {
                        foundAllCovariates = true;
                        // At this point all the covariates should have been found and initialized
-                        if( requestedCovariates.size() < 2 ) {
+                        if (requestedCovariates.size() < 2) {
-                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
+                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
                        }
                        final boolean createCollapsedTables = true;
                        // Initialize any covariate member variables using the shared argument collection
-                        for( Covariate cov : requestedCovariates ) {
+                        for (Covariate cov : requestedCovariates) {
-                            cov.initialize( RAC );
+                            cov.initialize(RAC);
                        }
                        // Initialize the data hashMaps
-                        dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
+                        dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
                    }
                    addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
                }
            }
-        } catch ( FileNotFoundException e ) {
+        } catch (FileNotFoundException e) {
            throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
-        } catch ( NumberFormatException e ) {
+        } catch (NumberFormatException e) {
            throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
        }
-        logger.info( "...done!" );
+        logger.info("...done!");
-        if ( !sawEOF ) {
+        if (!sawEOF) {
            final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
-            if ( REQUIRE_EOF )
+            if (REQUIRE_EOF)
                throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
            logger.warn(errorMessage);
        }
-        logger.info( "The covariates being used here: " );
+        logger.info("The covariates being used here: ");
-        for( Covariate cov : requestedCovariates ) {
+        for (Covariate cov : requestedCovariates) {
-            logger.info( "\t" + cov.getClass().getSimpleName() );
+            logger.info("\t" + cov.getClass().getSimpleName());
        }
-        if( dataManager == null ) {
+        if (dataManager == null) {
            throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
        }
        // Create the tables of empirical quality scores that will be used in the sequential calculation
-        logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
+        logger.info("Generating tables of empirical qualities for use in sequential calculation...");
-        dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
+        dataManager.generateEmpiricalQualities(SMOOTHING, MAX_QUALITY_SCORE);
-        logger.info( "...done!" );
+        logger.info("...done!");
        // Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
        final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
-        if( !NO_PG_TAG ) {
+        if (!NO_PG_TAG) {
            final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
            final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
            try {
                final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
                programRecord.setProgramVersion(version);
-            } catch (MissingResourceException e) {}
+            } catch (MissingResourceException e) {
            }
            StringBuffer sb = new StringBuffer();
            sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
            sb.append(" Covariates=[");
-            for( Covariate cov : requestedCovariates ) {
+            for (Covariate cov : requestedCovariates) {
                sb.append(cov.getClass().getSimpleName());
                sb.append(", ");
            }
-            sb.setCharAt(sb.length()-2, ']');
+            sb.setCharAt(sb.length() - 2, ']');
-            sb.setCharAt(sb.length()-1, ' ');
+            sb.setCharAt(sb.length() - 1, ' ');
            programRecord.setCommandLine(sb.toString());
            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
+            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size() + 1);
-            for ( SAMProgramRecord record : oldRecords ) {
+            for (SAMProgramRecord record : oldRecords) {
-                if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) )
+                if (!record.getId().startsWith(PROGRAM_RECORD_NAME))
                    newRecords.add(record);
            }
            newRecords.add(programRecord);
            header.setProgramRecords(newRecords);
            // Write out the new header
-            OUTPUT_BAM.writeHeader( header );
+            OUTPUT_BAM.writeHeader(header);
        }
    }
    /**
     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
     *
     * @param line A line of CSV data read from the recalibration table data file
     */
    private void addCSVData(final File file, final String line) {
        final String[] vals = line.split(",");
        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
+        if (vals.length != requestedCovariates.size() + 3) { // +3 because of nObservations, nMismatch, and Qempirical
            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
        }
@ -345,15 +351,15 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
        final Object[] key = new Object[requestedCovariates.size()];
        Covariate cov;
        int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
+        for (iii = 0; iii < requestedCovariates.size(); iii++) {
-            cov = requestedCovariates.get( iii );
+            cov = requestedCovariates.get(iii);
-            key[iii] = cov.getValue( vals[iii] );
+            key[iii] = cov.getValue(vals[iii]);
        }
        // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
+        final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
        // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
+        dataManager.addToAllTables(key, datum, PRESERVE_QSCORES_LESS_THAN);
    }
    //---------------------------------------------------------------------------------------------------------------
@ -366,64 +372,63 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     * For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
     *
     * @param refBases References bases over the length of the read
-     * @param read The read to be recalibrated
+     * @param read     The read to be recalibrated
     * @return The read with quality scores replaced
     */
-    public SAMRecord map( ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker  ) {
+    public SAMRecord map(ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
-        if( read.getReadLength() == 0 ) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
+        if (read.getReadLength() == 0) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
            return read;
        }
-        RecalDataManager.parseSAMRecord( read, RAC );
+        RecalDataManager.parseSAMRecord(read, RAC);
        byte[] originalQuals = read.getBaseQualities();
        final byte[] recalQuals = originalQuals.clone();
        final String platform = read.getReadGroup().getPlatform();
-        if( platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) ) {
+        if (platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING)) {
-            if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) ) {
+            if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)) {
-                final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
+                final boolean badColor = RecalDataManager.checkNoCallColorSpace(read);
-                if( badColor ) {
+                if (badColor) {
                    numReadsWithMalformedColorSpace++;
-                    if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
+                    if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                        return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
-                    } else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
+                    }
                    else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                        read.setReadFailsVendorQualityCheckFlag(true);
                        return read;
                    }
                }
            }
-            originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases() );
+            originalQuals = RecalDataManager.calcColorSpace(read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases());
        }
        //compute all covariate values for this read
-        final Comparable[][] covariateValues_offset_x_covar =
+        final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
            RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
        // For each base in the read
-        for( int offset = 0; offset < read.getReadLength(); offset++ ) {
+        for (int offset = 0; offset < read.getReadLength(); offset++) {
            final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
            Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
-            if(qualityScore == null)
+            if (qualityScore == null) {
-            {
+                qualityScore = performSequentialQualityCalculation(fullCovariateKey);
                qualityScore = performSequentialQualityCalculation( fullCovariateKey );
                qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
            }
            recalQuals[offset] = qualityScore;
        }
-        preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
+        preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
-        read.setBaseQualities( recalQuals ); // Overwrite old qualities with new recalibrated qualities
+        read.setBaseQualities(recalQuals); // Overwrite old qualities with new recalibrated qualities
-        if ( !DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // Save the old qualities if the tag isn't already taken in the read
+        if (!DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null) { // Save the old qualities if the tag isn't already taken in the read
            read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, SAMUtils.phredToFastq(originalQuals));
        }
-        if (! skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
+        if (!skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
            read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, refBases.getBases(), read.getAlignmentStart() - 1, false));
        }
@ -440,27 +445,28 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     *
     * Given the full recalibration table, we perform the following preprocessing steps:
     *
-     *   - calculate the global quality score shift across all data [DeltaQ]
+     * - calculate the global quality score shift across all data [DeltaQ]
-     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
+     * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
-     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
+     * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
-     *   - The final shift equation is:
+     * - The final shift equation is:
     *
     * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     *
     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     * @param key The list of Comparables that were calculated from the covariates
     * @return A recalibrated quality score as a byte
     */
-    private byte performSequentialQualityCalculation( final Object... key ) {
+    private byte performSequentialQualityCalculation(final Object... key) {
-        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
+        final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
        final Object[] readGroupCollapsedKey = new Object[1];
        final Object[] qualityScoreCollapsedKey = new Object[2];
        final Object[] covariateCollapsedKey = new Object[3];
        // The global quality shift (over the read group only)
        readGroupCollapsedKey[0] = key[0];
-        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
+        final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey));
        double globalDeltaQ = 0.0;
-        if( globalRecalDatum != null ) {
+        if (globalRecalDatum != null) {
            final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
            final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
@ -469,9 +475,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
        // The shift in quality between reported and empirical
        qualityScoreCollapsedKey[0] = key[0];
        qualityScoreCollapsedKey[1] = key[1];
-        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
+        final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey));
        double deltaQReported = 0.0;
-        if( qReportedRecalDatum != null ) {
+        if (qReportedRecalDatum != null) {
            final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
        }
@ -481,17 +487,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
        double deltaQCovariateEmpirical;
        covariateCollapsedKey[0] = key[0];
        covariateCollapsedKey[1] = key[1];
-        for( int iii = 2; iii < key.length; iii++ ) {
+        for (int iii = 2; iii < key.length; iii++) {
-            covariateCollapsedKey[2] =  key[iii]; // The given covariate
+            covariateCollapsedKey[2] = key[iii]; // The given covariate
-            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
+            final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey));
-            if( covariateRecalDatum != null ) {
+            if (covariateRecalDatum != null) {
                deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
-                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
+                deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
            }
        }
        final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
-        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
+        return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
        // Verbose printouts used to validate with old recalibrator
        //if(key.contains(null)) {
@ -508,12 +514,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /**
     * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
     *
     * @param originalQuals The list of original base quality scores
-     * @param recalQuals A list of the new recalibrated quality scores
+     * @param recalQuals    A list of the new recalibrated quality scores
     */
-    private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
+    private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
-        for( int iii = 0; iii < recalQuals.length; iii++ ) {
+        for (int iii = 0; iii < recalQuals.length; iii++) {
-            if( originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN ) {
+            if (originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN) {
                recalQuals[iii] = originalQuals[iii];
            }
        }
@ -527,6 +534,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /**
     * Start the reduce with a handle to the output bam file
     *
     * @return A FileWriter pointing to a new bam file
     */
    public SAMFileWriter reduceInit() {
@ -535,12 +543,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /**
     * Output each read to disk
-     * @param read The read to output
+     *
     * @param read   The read to output
     * @param output The FileWriter to write the read to
     * @return The FileWriter
     */
-    public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
+    public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
-        if( output != null ) {
+        if (output != null) {
            output.addAlignment(read);
        }
        return output;
@ -548,20 +557,22 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /**
     * Do nothing
     *
     * @param output The SAMFileWriter that outputs the bam file
     */
    public void onTraversalDone(SAMFileWriter output) {
-        if( numReadsWithMalformedColorSpace != 0 ) {
+        if (numReadsWithMalformedColorSpace != 0) {
-            if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
+            if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
-                    "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
+                        "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
-                    "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
+                        "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
-                    "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
+                        "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
-            } else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
+            }
            else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
-                    "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
+                        "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
-                    "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
+                        "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
-                    "These reads were completely removed from the output bam file.");
+                        "These reads were completely removed from the output bam file.");
            }
        }