From 601e53d633567ee7411afb9cda84235577ec3e95 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Thu, 2 Feb 2012 16:34:26 -0500
Subject: [PATCH 01/67] Fix when specifying preset active regions with -AR
 argument

---
 .../sting/gatk/traversals/TraverseActiveRegions.java   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 769bec720..ce8cb557b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -124,7 +124,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
 
             // Take the individual isActive calls and integrate them into contiguous active regions and
             // add these blocks of work to the work queue
-            final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension );
+            final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension, walker.presetActiveRegions != null );
             logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
             if( walker.activeRegionOutStream == null ) { 
                 workQueue.addAll( activeRegions ); 
@@ -214,7 +214,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     }
 
     // band-pass filter the list of isActive probabilities and turn into active regions
-    private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<Double> activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension ) {
+    private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<Double> activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension, final boolean presetRegions ) {
 
         final double ACTIVE_PROB_THRESHOLD = 0.2; // BUGBUG: needs to be set-able by the walker author
         final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
@@ -227,11 +227,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
         } else {
             final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]);
             final double[] filteredProbArray = new double[activeProbArray.length];
-            final int FILTER_SIZE = 50; // BUGBUG: needs to be set-able by the walker author
-            final int MAX_ACTIVE_REGION = 425; // BUGBUG: needs to be set-able by the walker author
+            final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // BUGBUG: needs to be set-able by the walker author
+            final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // BUGBUG: needs to be set-able by the walker author
             for( int iii = 0; iii < activeProbArray.length; iii++ ) {
                 double maxVal = 0;
-                for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
+                for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE+1); jjj++ ) {
                     if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
                 }
                 filteredProbArray[iii] = maxVal;

From e1d69e4060b4791f126eaf534f12152c5e067b98 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 1 Feb 2012 19:34:39 -0500
Subject: [PATCH 03/67] make the size of a GenomeLoc int instead of long

it will never be bigger than an int and it's actually useful to be an int so we can use it as parameters to array/list/hash size creation.
---
 .../src/org/broadinstitute/sting/utils/GenomeLoc.java    | 2 +-
 .../sting/utils/interval/IntervalUtils.java              | 4 ++--
 .../sting/utils/interval/IntervalUtilsUnitTest.java      | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index ad10b61e7..41ca58157 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -436,7 +436,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
      *         never be < 1.
      */
     @Ensures("result > 0")
-    public long size() {
+    public int size() {
         return stop - start + 1;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
index f8655f74a..ea1eaeb51 100644
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
@@ -314,10 +314,10 @@ public class IntervalUtils {
      * @param reference The reference for the intervals.
      * @return A map of contig names with their sizes.
      */
-    public static Map<String, Long> getContigSizes(File reference) {
+    public static Map<String, Integer> getContigSizes(File reference) {
         ReferenceDataSource referenceSource = new ReferenceDataSource(reference);
         List<GenomeLoc> locs = GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSource.getReference().getSequenceDictionary()).toList();
-        Map<String, Long> lengths = new LinkedHashMap<String, Long>();
+        Map<String, Integer> lengths = new LinkedHashMap<String, Integer>();
         for (GenomeLoc loc: locs)
             lengths.put(loc.getContig(), loc.size());
         return lengths;
diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
index a9035ffd9..0a8caa8cc 100644
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
@@ -8,13 +8,12 @@ import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.IntervalBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.testng.Assert;
-import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-
+import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -341,7 +340,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
 
     @Test
     public void testGetContigLengths() {
-        Map<String, Long> lengths = IntervalUtils.getContigSizes(new File(BaseTest.hg18Reference));
+        Map<String, Integer> lengths = IntervalUtils.getContigSizes(new File(BaseTest.hg18Reference));
         Assert.assertEquals((long)lengths.get("chr1"), 247249719);
         Assert.assertEquals((long)lengths.get("chr2"), 242951149);
         Assert.assertEquals((long)lengths.get("chr3"), 199501827);

From 3dd6a1f96272a6794108d88ac14ab8f892996ae4 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 1 Feb 2012 19:35:09 -0500
Subject: [PATCH 04/67] Adding some generic sum and average functions to
 MathUtils

---
 .../broadinstitute/sting/utils/MathUtils.java | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index 2f2dbd47e..814cb2765 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -137,6 +137,10 @@ public class MathUtils {
 
         return size;
     }
+    
+    public static double average(Collection<Integer> x) {
+        return (double) sum(x) / x.size();
+    }
 
     public static double average(Collection<Number> numbers, boolean ignoreNan) {
         if (ignoreNan) {
@@ -176,6 +180,13 @@ public class MathUtils {
         return s;
     }
 
+    public static long sum(int[] x) {
+        long total = 0;
+        for (int v : x)
+            total += v;
+        return total;
+    }
+
 
     /**
      * Calculates the log10 cumulative sum of an array with log10 probabilities
@@ -722,6 +733,13 @@ public class MathUtils {
         return average(vals, vals.size());
     }
 
+    public static double average(int[] x) {
+        int sum = 0;
+        for (int v : x)
+            sum += v;
+        return (double) sum / x.length;
+    }
+
     public static byte average(byte[] vals) {
         int sum = 0;
         for (byte v : vals) {
@@ -1079,6 +1097,13 @@ public class MathUtils {
         return getQScoreOrderStatistic(reads, offsets, (int) Math.floor(reads.size() / 2.));
     }
 
+    public static long sum(Collection<Integer> x) {
+        long sum = 0;
+        for (int v : x)
+            sum += v;        
+        return sum;
+    }
+
     /**
      * A utility class that computes on the fly average and standard deviation for a stream of numbers.
      * The number of observations does not have to be known in advance, and can be also very big (so that

From 4a57add6d0e7591bdbcd059d0195061d3a6a8152 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 1 Feb 2012 19:35:33 -0500
Subject: [PATCH 05/67] First implementation of DiagnoseTargets

   * calculates and interprets the coverage of a given interval track
   * allows to expand intervals by specified number of bases
   * classifies targets as CALLABLE, LOW_COVERAGE, EXCESSIVE_COVERAGE and POOR_QUALITY.
   * outputs text file for now (testing purposes only), soon to be VCF.
   * filters are overly aggressive for now.
---
 .../diagnostics/targets/CallableStatus.java   |  22 ++
 .../diagnostics/targets/DiagnoseTargets.java  | 172 ++++++++++++
 .../targets/IntervalStatisticLocus.java       |  34 +++
 .../targets/IntervalStatistics.java           | 122 +++++++++
 .../broadinstitute/sting/utils/MathUtils.java | 253 ++++++++----------
 5 files changed, 457 insertions(+), 146 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java
new file mode 100644
index 000000000..60f20074a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java
@@ -0,0 +1,22 @@
+package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
+
+/**
+ * Short one line description of the walker.
+ *
+ * @author Mauricio Carneiro
+ * @since 2/1/12
+ */
+public enum CallableStatus {
+    /** the reference base was an N, which is not considered callable the GATK */
+    REF_N,
+    /** the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE */
+    CALLABLE,
+    /** absolutely no reads were seen at this locus, regardless of the filtering parameters */
+    NO_COVERAGE,
+    /** there were less than min. depth bases at the locus, after applying filters */
+    LOW_COVERAGE,
+    /** more than -maxDepth read at the locus, indicating some sort of mapping problem */
+    EXCESSIVE_COVERAGE,
+    /** more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads */
+    POOR_QUALITY
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
new file mode 100644
index 000000000..979fb665f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java
@@ -0,0 +1,172 @@
+package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
+
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.IntervalBinding;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.By;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocComparator;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeSet;
+
+/**
+ * Short one line description of the walker.
+ *
+ * <p>
+ * [Long description of the walker]
+ * </p>
+ *
+ *
+ * <h2>Input</h2>
+ * <p>
+ * [Description of the Input]
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * [Description of the Output]
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T [walker name]
+ *  </pre>
+ *
+ * @author Mauricio Carneiro
+ * @since 2/1/12
+ */
+@By(value = DataSource.READS)
+public class DiagnoseTargets extends LocusWalker<Long, Long> {
+    @Input(fullName = "interval_track", shortName = "int", doc = "", required = true)
+    private IntervalBinding<Feature> intervalTrack = null;
+
+    @Output
+    private PrintStream out = System.out;
+
+    @Argument(fullName = "expand_interval", shortName = "exp", doc = "", required = false)
+    private int expandInterval = 50;
+
+    @Argument(fullName = "minimum_base_quality", shortName = "mbq", doc = "", required = false)
+    private int minimumBaseQuality = 20;
+
+    @Argument(fullName = "minimum_mapping_quality", shortName = "mmq", doc = "", required = false)
+    private int minimumMappingQuality = 20;
+
+    @Argument(fullName = "minimum_coverage", shortName = "mincov", doc = "", required = false)
+    private int minimumCoverage = 5;
+
+    @Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false)
+    private int maximumCoverage = 700;
+
+    private TreeSet<GenomeLoc> intervalList = null;                     // The list of intervals of interest (plus expanded intervals if user wants them)
+    private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null;  // interval => statistics
+    private Iterator<GenomeLoc> intervalListIterator;                   // An iterator to go over all the intervals provided as we traverse the genome
+    private GenomeLoc currentInterval = null;                           // The "current" interval loaded and being filled with statistics
+    private IntervalStatistics currentIntervalStatistics = null;                 // The "current" interval loaded and being filled with statistics
+
+    private GenomeLocParser parser;                                     // just an object to allow us to create genome locs (for the expanded intervals)
+
+    @Override
+    public void initialize() {
+        super.initialize();
+
+        if (intervalTrack == null)
+            throw new UserException("This tool currently only works if you provide an interval track");
+
+        parser = new GenomeLocParser(getToolkit().getMasterSequenceDictionary());       // Important to initialize the parser before creating the intervals below
+
+        List<GenomeLoc> originalList = intervalTrack.getIntervals(getToolkit());        // The original list of targets provided by the user that will be expanded or not depending on the options provided
+        intervalList = new TreeSet<GenomeLoc>(new GenomeLocComparator());
+        intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(originalList.size() * 2);
+        for (GenomeLoc interval : originalList)
+            addAndExpandIntervalToLists(interval);
+
+        intervalListIterator = intervalList.iterator();
+    }
+
+    @Override
+    public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        GenomeLoc refLocus = ref.getLocus();
+        while (currentInterval == null || currentInterval.isBefore(refLocus)) {
+            if (!intervalListIterator.hasNext())
+                return 0L;
+
+            currentInterval = intervalListIterator.next();
+            currentIntervalStatistics = intervalMap.get(currentInterval);
+        }
+
+        if (currentInterval.isPast(refLocus))
+            return 0L;
+
+        byte[] mappingQualities = context.getBasePileup().getMappingQuals();
+        byte[] baseQualities = context.getBasePileup().getQuals();
+        int coverage = context.getBasePileup().getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
+        int rawCoverage = context.size();
+
+        IntervalStatisticLocus locusData = new IntervalStatisticLocus(mappingQualities, baseQualities, coverage, rawCoverage);
+        currentIntervalStatistics.addLocus(refLocus, locusData);
+
+        return 1L;
+    }
+
+    @Override
+    public Long reduceInit() {
+        return 0L;
+    }
+
+    @Override
+    public Long reduce(Long value, Long sum) {
+        return sum + value;
+    }
+
+    @Override
+    public void onTraversalDone(Long result) {
+        super.onTraversalDone(result);
+        out.println("Interval\tCallStatus\tCOV\tAVG");
+        for (GenomeLoc interval : intervalList) {
+            IntervalStatistics stats = intervalMap.get(interval);
+            out.println(String.format("%s\t%s\t%d\t%f", interval, stats.callableStatus(), stats.totalCoverage(), stats.averageCoverage()));
+        }
+    }
+
+    private GenomeLoc createIntervalBefore(GenomeLoc interval) {
+        int start = Math.max(interval.getStart() - expandInterval, 0);
+        int stop = Math.max(interval.getStart() - 1, 0);
+        return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
+    }
+
+    private GenomeLoc createIntervalAfter(GenomeLoc interval) {
+        int contigLimit = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(interval.getContigIndex()).getSequenceLength();
+        int start = Math.min(interval.getStop() + 1, contigLimit);
+        int stop = Math.min(interval.getStop() + expandInterval, contigLimit);
+        return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
+    }
+
+    private void addAndExpandIntervalToLists(GenomeLoc interval) {
+        if (expandInterval > 0) {
+            GenomeLoc before = createIntervalBefore(interval);
+            GenomeLoc after = createIntervalAfter(interval);
+            intervalList.add(before);
+            intervalList.add(after);
+            intervalMap.put(before, new IntervalStatistics(before, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
+            intervalMap.put(after, new IntervalStatistics(after, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
+        }
+        intervalList.add(interval);
+        intervalMap.put(interval, new IntervalStatistics(interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java
new file mode 100644
index 000000000..5620c3902
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java
@@ -0,0 +1,34 @@
+package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
+
+/**
+ * The definition of a locus for the DiagnoseTargets walker statistics calculation
+ *
+ * @author Mauricio Carneiro
+ * @since 2/3/12
+ */
+class IntervalStatisticLocus {
+    private final byte[] mappingQuality;
+    private final byte[] baseQuality;
+    private final int coverage;
+    private final int rawCoverage;
+
+    public IntervalStatisticLocus(byte[] mappingQuality, byte[] baseQuality, int coverage, int rawCoverage) {
+        this.mappingQuality = mappingQuality;
+        this.baseQuality = baseQuality;
+        this.coverage = coverage;
+        this.rawCoverage = rawCoverage;
+    }
+
+    public IntervalStatisticLocus() {
+        this(new byte[1], new byte[1], 0, 0);
+    }
+
+    public int getCoverage() {
+        return coverage;
+    }
+
+    public int getRawCoverage() {
+        return rawCoverage;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java
new file mode 100644
index 000000000..8ee5f76fb
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java
@@ -0,0 +1,122 @@
+package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * Short one line description of the walker.
+ *
+ * @author Mauricio Carneiro
+ * @since 2/1/12
+ */
+class IntervalStatistics {
+    private final GenomeLoc interval;
+    private final ArrayList<IntervalStatisticLocus> loci;
+
+    private final int minimumCoverageThreshold;
+    private final int maximumCoverageThreshold;
+    private final int minimumMappingQuality;
+    private final int minimumBaseQuality;
+
+    private int preComputedTotalCoverage = -1;         // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
+
+    private IntervalStatistics(GenomeLoc interval, ArrayList<IntervalStatisticLocus> loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
+        this.interval = interval;
+        this.loci = loci;
+        this.minimumCoverageThreshold = minimumCoverageThreshold;
+        this.maximumCoverageThreshold = maximumCoverageThreshold;
+        this.minimumMappingQuality = minimumMappingQuality;
+        this.minimumBaseQuality = minimumBaseQuality;
+    }
+
+    public IntervalStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
+        this(interval, new ArrayList<IntervalStatisticLocus>(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality);
+
+        // Initialize every loci (this way we don't have to worry about non-existent loci in the object
+        for (int i = 0; i < interval.size(); i++)
+            this.loci.add(i, new IntervalStatisticLocus());
+
+    }
+
+    public long totalCoverage() {
+        if (preComputedTotalCoverage < 0)
+            calculateTotalCoverage();
+        return preComputedTotalCoverage;
+    }
+
+    public double averageCoverage() {
+        if (preComputedTotalCoverage < 0)
+            calculateTotalCoverage();
+        return (double) preComputedTotalCoverage / loci.size();
+    }
+
+    /**
+     * Calculates the callable status of the entire interval
+     *
+     * @return the callable status of the entire interval
+     */
+    public CallableStatus callableStatus() {
+        long max = -1;
+        CallableStatus maxCallableStatus = null;
+        HashMap<CallableStatus, Integer> statusCounts = new HashMap<CallableStatus, Integer>(CallableStatus.values().length);
+
+        // initialize the statusCounts with all callable states
+        for (CallableStatus key : CallableStatus.values())
+            statusCounts.put(key, 0);
+
+        // calculate the callable status for each locus
+        for (int i = 0; i < loci.size(); i++) {
+            CallableStatus status = callableStatus(i);
+            int count = statusCounts.get(status) + 1;
+            statusCounts.put(status, count);
+
+            if (count > max) {
+                max = count;
+                maxCallableStatus = status;
+            }
+        }
+
+        return maxCallableStatus;
+    }
+
+    public void addLocus(GenomeLoc locus, IntervalStatisticLocus locusData) {
+        if (!interval.containsP(locus))
+            throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus));
+
+        int locusIndex = locus.getStart() - interval.getStart();
+
+        loci.add(locusIndex, locusData);
+    }
+
+    /**
+     * returns the callable status of this locus without taking the reference base into account.
+     *
+     * @param locusIndex location in the genome to inquire (only one locus)
+     * @return the callable status of a locus
+     */
+    private CallableStatus callableStatus(int locusIndex) {
+        if (loci.get(locusIndex).getCoverage() > maximumCoverageThreshold)
+            return CallableStatus.EXCESSIVE_COVERAGE;
+
+        if (loci.get(locusIndex).getCoverage() >= minimumCoverageThreshold)
+            return CallableStatus.CALLABLE;
+
+        if (loci.get(locusIndex).getRawCoverage() >= minimumCoverageThreshold)
+            return CallableStatus.POOR_QUALITY;
+
+        if (loci.get(locusIndex).getRawCoverage() > 0)
+            return CallableStatus.LOW_COVERAGE;
+
+        return CallableStatus.NO_COVERAGE;
+    }
+
+    private void calculateTotalCoverage() {
+        preComputedTotalCoverage = 0;
+        for (IntervalStatisticLocus locus : loci)
+            preComputedTotalCoverage += locus.getCoverage();
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index 814cb2765..a4e9fc7ed 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -49,7 +49,6 @@ public class MathUtils {
      * high precision 
      */
 
-
     /**
      * Private constructor.  No instantiating this class!
      */
@@ -60,48 +59,48 @@ public class MathUtils {
     // under/overflow checking, so this shouldn't be used in the general case (but is fine
     // if one is already make those checks before calling in to the rounding).
     public static int fastRound(double d) {
-	return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d);
+        return (d > 0) ? (int) (d + 0.5d) : (int) (d - 0.5d);
     }
 
     public static double approximateLog10SumLog10(final double[] vals) {
-	return approximateLog10SumLog10(vals, vals.length);
+        return approximateLog10SumLog10(vals, vals.length);
     }
 
     public static double approximateLog10SumLog10(final double[] vals, final int endIndex) {
 
-	final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);
-	double approxSum = vals[maxElementIndex];
-        if ( approxSum == Double.NEGATIVE_INFINITY )
+        final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);
+        double approxSum = vals[maxElementIndex];
+        if (approxSum == Double.NEGATIVE_INFINITY)
             return approxSum;
 
-        for ( int i = 0; i < endIndex; i++ ) {
-	    if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY )
-		continue;
+        for (int i = 0; i < endIndex; i++) {
+            if (i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY)
+                continue;
 
-	    final double diff = approxSum - vals[i];
-	    if ( diff < MathUtils.MAX_JACOBIAN_TOLERANCE ) {
-		// See notes from the 2-inout implementation below
-		final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
-		approxSum += MathUtils.jacobianLogTable[ind];
-	    }
-	}
+            final double diff = approxSum - vals[i];
+            if (diff < MathUtils.MAX_JACOBIAN_TOLERANCE) {
+                // See notes from the 2-inout implementation below
+                final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
+                approxSum += MathUtils.jacobianLogTable[ind];
+            }
+        }
 
         return approxSum;
     }
 
     public static double approximateLog10SumLog10(double small, double big) {
         // make sure small is really the smaller value
-        if ( small > big ) {
+        if (small > big) {
             final double t = big;
             big = small;
             small = t;
         }
 
-        if ( small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
+        if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY)
             return big;
 
-	final double diff = big - small;
-        if ( diff >= MathUtils.MAX_JACOBIAN_TOLERANCE )
+        final double diff = big - small;
+        if (diff >= MathUtils.MAX_JACOBIAN_TOLERANCE)
             return big;
 
         // OK, so |y-x| < tol: we use the following identity then:
@@ -137,7 +136,7 @@ public class MathUtils {
 
         return size;
     }
-    
+
     public static double average(Collection<Integer> x) {
         return (double) sum(x) / x.size();
     }
@@ -145,7 +144,8 @@ public class MathUtils {
     public static double average(Collection<Number> numbers, boolean ignoreNan) {
         if (ignoreNan) {
             return sum(numbers, true) / nonNanSize(numbers);
-        } else {
+        }
+        else {
             return sum(numbers, false) / nonNanSize(numbers);
         }
     }
@@ -176,7 +176,8 @@ public class MathUtils {
 
     public static double sum(double[] values) {
         double s = 0.0;
-        for (double v : values) s += v;
+        for (double v : values)
+            s += v;
         return s;
     }
 
@@ -187,7 +188,6 @@ public class MathUtils {
         return total;
     }
 
-
     /**
      * Calculates the log10 cumulative sum of an array with log10 probabilities
      *
@@ -229,21 +229,23 @@ public class MathUtils {
 
     public static double sumDoubles(List<Double> values) {
         double s = 0.0;
-        for (double v : values) s += v;
+        for (double v : values)
+            s += v;
         return s;
     }
 
     public static int sumIntegers(List<Integer> values) {
         int s = 0;
-        for (int v : values) s += v;
+        for (int v : values)
+            s += v;
         return s;
     }
 
     public static double sumLog10(double[] log10values) {
         return Math.pow(10.0, log10sumLog10(log10values));
-//        double s = 0.0;
-//        for ( double v : log10values) s += Math.pow(10.0, v);
-//        return s;
+        //        double s = 0.0;
+        //        for ( double v : log10values) s += Math.pow(10.0, v);
+        //        return s;
     }
 
     public static double log10sumLog10(double[] log10values) {
@@ -456,7 +458,6 @@ public class MathUtils {
         return Math.sqrt(rms);
     }
 
-
     /**
      * calculate the Root Mean Square of an array of integers
      *
@@ -517,7 +518,6 @@ public class MathUtils {
         return result;
     }
 
-
     /**
      * normalizes the log10-based array.  ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
      *
@@ -554,7 +554,8 @@ public class MathUtils {
             sum += normalized[i];
         for (int i = 0; i < array.length; i++) {
             double x = normalized[i] / sum;
-            if (takeLog10OfOutput) x = Math.log10(x);
+            if (takeLog10OfOutput)
+                x = Math.log10(x);
             normalized[i] = x;
         }
 
@@ -576,7 +577,8 @@ public class MathUtils {
             sum += normalized[i];
         for (int i = 0; i < array.size(); i++) {
             double x = normalized[i] / sum;
-            if (takeLog10OfOutput) x = Math.log10(x);
+            if (takeLog10OfOutput)
+                x = Math.log10(x);
             normalized[i] = x;
         }
 
@@ -598,11 +600,12 @@ public class MathUtils {
     }
 
     public static int maxElementIndex(final double[] array) {
-	return maxElementIndex(array, array.length);
+        return maxElementIndex(array, array.length);
     }
 
     public static int maxElementIndex(final double[] array, final int endIndex) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
 
         int maxI = -1;
         for (int i = 0; i < endIndex; i++) {
@@ -614,11 +617,12 @@ public class MathUtils {
     }
 
     public static int maxElementIndex(final int[] array) {
-	return maxElementIndex(array, array.length);
+        return maxElementIndex(array, array.length);
     }
 
     public static int maxElementIndex(final int[] array, int endIndex) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
 
         int maxI = -1;
         for (int i = 0; i < endIndex; i++) {
@@ -646,7 +650,8 @@ public class MathUtils {
     }
 
     public static int minElementIndex(double[] array) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
 
         int minI = -1;
         for (int i = 0; i < array.length; i++) {
@@ -658,7 +663,8 @@ public class MathUtils {
     }
 
     public static int minElementIndex(byte[] array) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
 
         int minI = -1;
         for (int i = 0; i < array.length; i++) {
@@ -670,7 +676,8 @@ public class MathUtils {
     }
 
     public static int minElementIndex(int[] array) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
 
         int minI = -1;
         for (int i = 0; i < array.length; i++) {
@@ -682,20 +689,26 @@ public class MathUtils {
     }
 
     public static int arrayMaxInt(List<Integer> array) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
-        if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
+        if (array.size() == 0)
+            throw new IllegalArgumentException("Array size cannot be 0!");
 
         int m = array.get(0);
-        for (int e : array) m = Math.max(m, e);
+        for (int e : array)
+            m = Math.max(m, e);
         return m;
     }
 
     public static double arrayMaxDouble(List<Double> array) {
-        if (array == null) throw new IllegalArgumentException("Array cannot be null!");
-        if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!");
+        if (array == null)
+            throw new IllegalArgumentException("Array cannot be null!");
+        if (array.size() == 0)
+            throw new IllegalArgumentException("Array size cannot be 0!");
 
         double m = array.get(0);
-        for (double e : array) m = Math.max(m, e);
+        for (double e : array)
+            m = Math.max(m, e);
         return m;
     }
 
@@ -816,7 +829,6 @@ public class MathUtils {
         return permutation;
     }
 
-
     public static int[] permuteArray(int[] array, Integer[] permutation) {
         int[] output = new int[array.length];
         for (int i = 0; i < output.length; i++) {
@@ -857,7 +869,6 @@ public class MathUtils {
         return output;
     }
 
-
     /**
      * Draw N random elements from list.
      */
@@ -923,7 +934,8 @@ public class MathUtils {
     public static <T> int countOccurrences(T x, List<T> l) {
         int count = 0;
         for (T y : l) {
-            if (x.equals(y)) count++;
+            if (x.equals(y))
+                count++;
         }
 
         return count;
@@ -1031,9 +1043,11 @@ public class MathUtils {
         for (Comparable y : list) {
             if (x.compareTo(y) > 0) {
                 lessThanX.add(y);
-            } else if (x.compareTo(y) < 0) {
+            }
+            else if (x.compareTo(y) < 0) {
                 greaterThanX.add(y);
-            } else
+            }
+            else
                 equalToX.add(y);
         }
 
@@ -1046,7 +1060,6 @@ public class MathUtils {
 
     }
 
-
     public static Object getMedian(List<Comparable> list) {
         return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
     }
@@ -1076,10 +1089,12 @@ public class MathUtils {
             if (quality < qk) {
                 lessThanQReads.add(read);
                 lessThanQOffsets.add(offset);
-            } else if (quality > qk) {
+            }
+            else if (quality > qk) {
                 greaterThanQReads.add(read);
                 greaterThanQOffsets.add(offset);
-            } else {
+            }
+            else {
                 equalToQReads.add(reads.get(iter));
             }
         }
@@ -1100,7 +1115,7 @@ public class MathUtils {
     public static long sum(Collection<Integer> x) {
         long sum = 0;
         for (int v : x)
-            sum += v;        
+            sum += v;
         return sum;
     }
 
@@ -1209,8 +1224,7 @@ public class MathUtils {
             log10Cache[k] = Math.log10(k);
 
         for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
-            jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k)
-                    * JACOBIAN_LOG_TABLE_STEP));
+            jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
 
         }
     }
@@ -1257,7 +1271,8 @@ public class MathUtils {
         else if (diff >= 0) {
             int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
             return x + jacobianLogTable[ind];
-        } else {
+        }
+        else {
             int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5);
             return y + jacobianLogTable[ind];
         }
@@ -1298,71 +1313,7 @@ public class MathUtils {
     /**
      * Constants to simplify the log gamma function calculation.
      */
-    private static final double
-            zero = 0.0,
-            one = 1.0,
-            half = .5,
-            a0 = 7.72156649015328655494e-02,
-            a1 = 3.22467033424113591611e-01,
-            a2 = 6.73523010531292681824e-02,
-            a3 = 2.05808084325167332806e-02,
-            a4 = 7.38555086081402883957e-03,
-            a5 = 2.89051383673415629091e-03,
-            a6 = 1.19270763183362067845e-03,
-            a7 = 5.10069792153511336608e-04,
-            a8 = 2.20862790713908385557e-04,
-            a9 = 1.08011567247583939954e-04,
-            a10 = 2.52144565451257326939e-05,
-            a11 = 4.48640949618915160150e-05,
-            tc = 1.46163214496836224576e+00,
-            tf = -1.21486290535849611461e-01,
-            tt = -3.63867699703950536541e-18,
-            t0 = 4.83836122723810047042e-01,
-            t1 = -1.47587722994593911752e-01,
-            t2 = 6.46249402391333854778e-02,
-            t3 = -3.27885410759859649565e-02,
-            t4 = 1.79706750811820387126e-02,
-            t5 = -1.03142241298341437450e-02,
-            t6 = 6.10053870246291332635e-03,
-            t7 = -3.68452016781138256760e-03,
-            t8 = 2.25964780900612472250e-03,
-            t9 = -1.40346469989232843813e-03,
-            t10 = 8.81081882437654011382e-04,
-            t11 = -5.38595305356740546715e-04,
-            t12 = 3.15632070903625950361e-04,
-            t13 = -3.12754168375120860518e-04,
-            t14 = 3.35529192635519073543e-04,
-            u0 = -7.72156649015328655494e-02,
-            u1 = 6.32827064025093366517e-01,
-            u2 = 1.45492250137234768737e+00,
-            u3 = 9.77717527963372745603e-01,
-            u4 = 2.28963728064692451092e-01,
-            u5 = 1.33810918536787660377e-02,
-            v1 = 2.45597793713041134822e+00,
-            v2 = 2.12848976379893395361e+00,
-            v3 = 7.69285150456672783825e-01,
-            v4 = 1.04222645593369134254e-01,
-            v5 = 3.21709242282423911810e-03,
-            s0 = -7.72156649015328655494e-02,
-            s1 = 2.14982415960608852501e-01,
-            s2 = 3.25778796408930981787e-01,
-            s3 = 1.46350472652464452805e-01,
-            s4 = 2.66422703033638609560e-02,
-            s5 = 1.84028451407337715652e-03,
-            s6 = 3.19475326584100867617e-05,
-            r1 = 1.39200533467621045958e+00,
-            r2 = 7.21935547567138069525e-01,
-            r3 = 1.71933865632803078993e-01,
-            r4 = 1.86459191715652901344e-02,
-            r5 = 7.77942496381893596434e-04,
-            r6 = 7.32668430744625636189e-06,
-            w0 = 4.18938533204672725052e-01,
-            w1 = 8.33333333333329678849e-02,
-            w2 = -2.77777777728775536470e-03,
-            w3 = 7.93650558643019558500e-04,
-            w4 = -5.95187557450339963135e-04,
-            w5 = 8.36339918996282139126e-04,
-            w6 = -1.63092934096575273989e-03;
+    private static final double zero = 0.0, one = 1.0, half = .5, a0 = 7.72156649015328655494e-02, a1 = 3.22467033424113591611e-01, a2 = 6.73523010531292681824e-02, a3 = 2.05808084325167332806e-02, a4 = 7.38555086081402883957e-03, a5 = 2.89051383673415629091e-03, a6 = 1.19270763183362067845e-03, a7 = 5.10069792153511336608e-04, a8 = 2.20862790713908385557e-04, a9 = 1.08011567247583939954e-04, a10 = 2.52144565451257326939e-05, a11 = 4.48640949618915160150e-05, tc = 1.46163214496836224576e+00, tf = -1.21486290535849611461e-01, tt = -3.63867699703950536541e-18, t0 = 4.83836122723810047042e-01, t1 = -1.47587722994593911752e-01, t2 = 6.46249402391333854778e-02, t3 = -3.27885410759859649565e-02, t4 = 1.79706750811820387126e-02, t5 = -1.03142241298341437450e-02, t6 = 6.10053870246291332635e-03, t7 = -3.68452016781138256760e-03, t8 = 2.25964780900612472250e-03, t9 = -1.40346469989232843813e-03, t10 = 8.81081882437654011382e-04, t11 = -5.38595305356740546715e-04, t12 = 3.15632070903625950361e-04, t13 = -3.12754168375120860518e-04, t14 = 3.35529192635519073543e-04, u0 = -7.72156649015328655494e-02, u1 = 6.32827064025093366517e-01, u2 = 1.45492250137234768737e+00, u3 = 9.77717527963372745603e-01, u4 = 2.28963728064692451092e-01, u5 = 1.33810918536787660377e-02, v1 = 2.45597793713041134822e+00, v2 = 2.12848976379893395361e+00, v3 = 7.69285150456672783825e-01, v4 = 1.04222645593369134254e-01, v5 = 3.21709242282423911810e-03, s0 = -7.72156649015328655494e-02, s1 = 2.14982415960608852501e-01, s2 = 3.25778796408930981787e-01, s3 = 1.46350472652464452805e-01, s4 = 2.66422703033638609560e-02, s5 = 1.84028451407337715652e-03, s6 = 3.19475326584100867617e-05, r1 = 1.39200533467621045958e+00, r2 = 7.21935547567138069525e-01, r3 = 1.71933865632803078993e-01, r4 = 1.86459191715652901344e-02, r5 = 7.77942496381893596434e-04, r6 = 7.32668430744625636189e-06, w0 = 4.18938533204672725052e-01, w1 = 8.33333333333329678849e-02, w2 = -2.77777777728775536470e-03, w3 = 7.93650558643019558500e-04, w4 = -5.95187557450339963135e-04, w5 = 8.36339918996282139126e-04, w6 = -1.63092934096575273989e-03;
 
     /**
      * Efficient rounding functions to simplify the log gamma function calculation
@@ -1393,14 +1344,17 @@ public class MathUtils {
 
         /* purge off +-inf, NaN, +-0, and negative arguments */
         int ix = hx & 0x7fffffff;
-        if (ix >= 0x7ff00000) return Double.POSITIVE_INFINITY;
-        if ((ix | lx) == 0 || hx < 0) return Double.NaN;
+        if (ix >= 0x7ff00000)
+            return Double.POSITIVE_INFINITY;
+        if ((ix | lx) == 0 || hx < 0)
+            return Double.NaN;
         if (ix < 0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
             return -Math.log(x);
         }
 
         /* purge off 1 and 2 */
-        if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0;
+        if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0))
+            r = 0;
             /* for x < 2.0 */
         else if (ix < 0x40000000) {
             if (ix <= 0x3feccccc) {     /* lgamma(x) = lgamma(x+1)-log(x) */
@@ -1408,22 +1362,27 @@ public class MathUtils {
                 if (ix >= 0x3FE76944) {
                     y = one - x;
                     i = 0;
-                } else if (ix >= 0x3FCDA661) {
+                }
+                else if (ix >= 0x3FCDA661) {
                     y = x - (tc - one);
                     i = 1;
-                } else {
+                }
+                else {
                     y = x;
                     i = 2;
                 }
-            } else {
+            }
+            else {
                 r = zero;
                 if (ix >= 0x3FFBB4C3) {
                     y = 2.0 - x;
                     i = 0;
-                } /* [1.7316,2] */ else if (ix >= 0x3FF3B4C4) {
+                } /* [1.7316,2] */
+                else if (ix >= 0x3FF3B4C4) {
                     y = x - tc;
                     i = 1;
-                } /* [1.23,1.73] */ else {
+                } /* [1.23,1.73] */
+                else {
                     y = x - one;
                     i = 2;
                 }
@@ -1451,7 +1410,8 @@ public class MathUtils {
                     p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
                     r += (-0.5 * y + p1 / p2);
             }
-        } else if (ix < 0x40200000) {             /* x < 8.0 */
+        }
+        else if (ix < 0x40200000) {             /* x < 8.0 */
             i = (int) x;
             t = zero;
             y = x - (double) i;
@@ -1474,13 +1434,15 @@ public class MathUtils {
                     break;
             }
             /* 8.0 <= x < 2**58 */
-        } else if (ix < 0x43900000) {
+        }
+        else if (ix < 0x43900000) {
             t = Math.log(x);
             z = one / x;
             y = z * z;
             w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
             r = (x - half) * (t - one) + w;
-        } else
+        }
+        else
             /* 2**58 <= x <= inf */
             r = x * (Math.log(x) - one);
         return r;
@@ -1515,7 +1477,6 @@ public class MathUtils {
         return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k);
     }
 
-
     /**
      * Calculates the log10 of the multinomial coefficient. Designed to prevent
      * overflows even with very large numbers.
@@ -1559,7 +1520,6 @@ public class MathUtils {
         return log10Gamma(x + 1);
     }
 
-
     /**
      * Adds two arrays together and returns a new array with the sum.
      *
@@ -1597,17 +1557,18 @@ public class MathUtils {
 
     /**
      * Vector operations
+     *
      * @param v1 first numerical array
      * @param v2 second numerical array
-      * @return a new array with the elements added
+     * @return a new array with the elements added
      */
     public static <E extends Number> Double[] vectorSum(E v1[], E v2[]) {
         if (v1.length != v2.length)
             throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
 
         Double[] result = new Double[v1.length];
-        for (int k=0; k < v1.length; k++)
-            result[k] = v1[k].doubleValue()+v2[k].doubleValue();
+        for (int k = 0; k < v1.length; k++)
+            result[k] = v1[k].doubleValue() + v2[k].doubleValue();
 
         return result;
     }
@@ -1615,19 +1576,19 @@ public class MathUtils {
     public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {
 
         Double result[] = new Double[v1.length];
-        for (int k=0; k < v1.length; k++)
-            result[k] = a.doubleValue()*v1[k].doubleValue();
+        for (int k = 0; k < v1.length; k++)
+            result[k] = a.doubleValue() * v1[k].doubleValue();
 
         return result;
     }
 
-    public static <E extends Number> Double  dotProduct(E[] v1, E[] v2) {
+    public static <E extends Number> Double dotProduct(E[] v1, E[] v2) {
         if (v1.length != v2.length)
             throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
 
         Double result = 0.0;
-        for (int k=0; k < v1.length; k++)
-            result +=  v1[k].doubleValue() *v2[k].doubleValue();
+        for (int k = 0; k < v1.length; k++)
+            result += v1[k].doubleValue() * v2[k].doubleValue();
 
         return result;
 
@@ -1635,7 +1596,7 @@ public class MathUtils {
 
     public static double[] vectorLog10(double v1[]) {
         double result[] = new double[v1.length];
-        for (int k=0; k < v1.length; k++)
+        for (int k = 0; k < v1.length; k++)
             result[k] = Math.log10(v1[k]);
 
         return result;
@@ -1645,7 +1606,7 @@ public class MathUtils {
     // todo - silly overloading, just because Java can't unbox/box arrays of primitive types, and we can't do generics with primitive types!
     public static Double[] vectorLog10(Double v1[]) {
         Double result[] = new Double[v1.length];
-        for (int k=0; k < v1.length; k++)
+        for (int k = 0; k < v1.length; k++)
             result[k] = Math.log10(v1[k]);
 
         return result;

From 894d3340be2a95a15c6c91f4785e8d8f1ef5776b Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Fri, 3 Feb 2012 17:13:52 -0500
Subject: [PATCH 06/67] Active Region Traversal should use GATKSAMRecords
 everywhere instead of SAMRecords. misc cleanup.

---
 .../analyzecovariates/AnalyzeCovariates.java  |  7 +++----
 .../traversals/TraverseActiveRegions.java     | 21 +++++++++----------
 .../recalibration/CountCovariatesWalker.java  |  2 +-
 .../TableRecalibrationWalker.java             |  2 +-
 .../broadinstitute/sting/utils/baq/BAQ.java   |  2 +-
 5 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
index a399867fa..a99959341 100755
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@@ -139,11 +139,11 @@ public class AnalyzeCovariates extends CommandLineProgram {
      */
     @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
     private int MAX_HISTOGRAM_VALUE = 0;
+
     @Hidden
     @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
     private boolean DO_INDEL_QUALITY = false;
 
-
     /////////////////////////////
     // Private Member Variables
     /////////////////////////////
@@ -274,7 +274,6 @@ public class AnalyzeCovariates extends CommandLineProgram {
         RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
         // Add that datum to all the collapsed tables which will be used in the sequential calculation
         dataManager.addToAllTables( key, datum, IGNORE_QSCORES_LESS_THAN );
-
     }
 
     private void writeDataTables() {
@@ -341,7 +340,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
 
                 // for each covariate
                 for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
-                    Covariate cov = requestedCovariates.get(iii);
+                    final Covariate cov = requestedCovariates.get(iii);
                     final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
                     if (DO_INDEL_QUALITY) {
                         RScriptExecutor executor = new RScriptExecutor();
@@ -349,7 +348,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
                         // The second argument is the name of the covariate in order to make the plots look nice
                         executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
                         executor.exec();
-                    }   else {
+                    } else {
                         if( iii == 1 ) {
                             // Analyze reported quality
                             RScriptExecutor executor = new RScriptExecutor();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index ce8cb557b..58c2df877 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -1,6 +1,5 @@
 package org.broadinstitute.sting.gatk.traversals;
 
-import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.WalkerManager;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@@ -29,7 +28,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     protected static Logger logger = Logger.getLogger(TraversalEngine.class);
 
     private final Queue<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
-    private final LinkedHashSet<SAMRecord> myReads = new LinkedHashSet<SAMRecord>();
+    private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
 
     @Override
     protected String getTraversalType() {
@@ -101,7 +100,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
 
                 // Grab all the previously unseen reads from this pileup and add them to the massive read list
                 for( final PileupElement p : locus.getBasePileup() ) {
-                    final SAMRecord read = p.getRead();
+                    final GATKSAMRecord read = p.getRead();
                     if( !myReads.contains(read) ) {
                         myReads.add(read);
                     }
@@ -111,7 +110,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                 //   which active regions in the work queue are now safe to process
                 if( !locusView.hasNext() ) {
                     for( final PileupElement p : locus.getBasePileup() ) {
-                        final SAMRecord read = p.getRead();
+                        final GATKSAMRecord read = p.getRead();
                         if( !myReads.contains(read) ) {
                             myReads.add(read);
                         }
@@ -156,9 +155,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
         return sum;
     }
 
-    private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<SAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
-        final ArrayList<SAMRecord> placedReads = new ArrayList<SAMRecord>();
-        for( final SAMRecord read : reads ) {
+    private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
+        final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
+        for( final GATKSAMRecord read : reads ) {
             final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
             if( activeRegion.getLocation().overlapsP( readLoc ) ) {
                 // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
@@ -170,22 +169,22 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                         bestRegion = otherRegionToTest;
                     }
                 }
-                bestRegion.add( (GATKSAMRecord) read );
+                bestRegion.add( read );
 
                 // The read is also added to all other regions in which it overlaps but marked as non-primary
                 if( walker.wantsNonPrimaryReads() ) {
                     if( !bestRegion.equals(activeRegion) ) {
-                        activeRegion.add( (GATKSAMRecord) read );
+                        activeRegion.add( read );
                     }
                     for( final ActiveRegion otherRegionToTest : workQueue ) {
                         if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
-                            otherRegionToTest.add( (GATKSAMRecord) read );
+                            otherRegionToTest.add( read );
                         }
                     }
                 }
                 placedReads.add( read );
             } else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
-                activeRegion.add( (GATKSAMRecord) read );
+                activeRegion.add( read );
             }
         }
         reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index fdfb29da6..f6f05d39c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -354,7 +354,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
         if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
             // For each read at this locus
             for( final PileupElement p : context.getBasePileup() ) {
-                final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead();
+                final GATKSAMRecord gatkRead = p.getRead();
                 int offset = p.getOffset();
 
                 if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index 1ce02a3cf..6e214c6bb 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -399,7 +399,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
         //compute all covariate values for this read
         final Comparable[][] covariateValues_offset_x_covar =
-            RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
+            RecalDataManager.computeCovariates(read, requestedCovariates);
 
         // For each base in the read
         for( int offset = 0; offset < read.getReadLength(); offset++ ) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index 4f096f86e..186452294 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -673,7 +673,7 @@ public class BAQ {
     }
 
     /**
-     * Returns true if we don't think this read is eligable for the BAQ calculation.  Examples include non-PF reads,
+     * Returns true if we don't think this read is eligible for the BAQ calculation.  Examples include non-PF reads,
      * duplicates, or unmapped reads.  Used by baqRead to determine if a read should fall through the calculation.
      *
      * @param read

From 5343f8ba67cccb90fd91ad77e09f8bd9a2a2d7f5 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sun, 5 Feb 2012 13:09:03 -0500
Subject: [PATCH 08/67] Initial version of on-the-fly, lazy loading base
 quality score recalibration. It isn't completely hooked up yet but I'm
 committing so Mauricio and Mark can see how I envision it will fit together.
 Look it over and give any feedback. With the exception of the Solid specific
 code we are very very close to being able to remove TableRecalibrationWalker
 from the code base and just replace it with PrintReads -BQSR recal.csv

---
 .../sting/gatk/GenomeAnalysisEngine.java      |  17 +-
 .../arguments/GATKArgumentCollection.java     |   9 +
 .../recalibration/ContextCovariate.java       |  62 ++++
 .../recalibration/RecalDataManager.java       |   5 +-
 .../TableRecalibrationWalker.java             |   1 -
 .../sting/utils/QualityUtils.java             |   8 +
 .../sting/utils/pileup/PileupElement.java     |  17 +-
 .../recalibration/BaseRecalibration.java      | 293 ++++++++++++++++++
 .../sting/utils/sam/GATKSAMRecord.java        |  61 ++++
 9 files changed, 466 insertions(+), 7 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 6140d543a..97d1de1fa 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -53,6 +53,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 import java.io.File;
 import java.util.*;
@@ -179,10 +180,18 @@ public class GenomeAnalysisEngine {
      */
     private static final long GATK_RANDOM_SEED = 47382911L;
     private static Random randomGenerator = new Random(GATK_RANDOM_SEED);
-
     public static Random getRandomGenerator() { return randomGenerator; }
     public static void resetRandomGenerator() { randomGenerator.setSeed(GATK_RANDOM_SEED); }
     public static void resetRandomGenerator(long seed) { randomGenerator.setSeed(seed); }
+
+    /**
+     *  Static base quality score recalibration helper object
+     */
+    private static BaseRecalibration baseRecalibration = null;
+    public static BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
+    public static boolean hasBaseRecalibration() { return baseRecalibration != null; }
+    public static void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); }
+
     /**
      * Actually run the GATK with the specified walker.
      *
@@ -205,6 +214,10 @@ public class GenomeAnalysisEngine {
         if (this.getArguments().nonDeterministicRandomSeed)
             resetRandomGenerator(System.currentTimeMillis());
 
+        // if the use specified an input BQSR recalibration table then enable on the fly recalibration
+        if (this.getArguments().RECAL_FILE != null)
+            setBaseRecalibration(this.getArguments().RECAL_FILE);
+
         // Determine how the threads should be divided between CPU vs. IO.
         determineThreadAllocation();
 
@@ -224,7 +237,7 @@ public class GenomeAnalysisEngine {
         // create temp directories as necessary
         initializeTempDirectory();
 
-        // create the output streams                     "
+        // create the output streams
         initializeOutputStreams(microScheduler.getOutputTracker());
 
         Iterable<Shard> shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 08d2c1ad1..206fa5765 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -185,6 +185,15 @@ public class GATKArgumentCollection {
     @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
     public Boolean useOriginalBaseQualities = false;
 
+    /**
+     * After the header, data records occur one per line until the end of the file. The first several items on a line are the
+     * values of the individual covariates and will change depending on which covariates were specified at runtime. The last
+     * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
+     * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+     */
+    @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration")
+    public File RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
+
     @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
     public byte defaultBaseQualities = -1;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
new file mode 100644
index 000000000..837062dd2
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.recalibration;
+
+import net.sf.samtools.SAMRecord;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 9/26/11
+ */
+
+public class ContextCovariate implements Covariate {
+
+    final int CONTEXT_SIZE = 8;
+    String allN = "";
+
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    public void initialize( final RecalibrationArgumentCollection RAC ) {
+        for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) {
+            allN += "N";
+        }
+    }
+
+    public void getValues(SAMRecord read, Comparable[] comparable) {
+        byte[] bases = read.getReadBases();
+        for(int i = 0; i < read.getReadLength(); i++) {
+            comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) );
+        }
+    }
+
+    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    public final Comparable getValue( final String str ) {
+        return str;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index a0c928afa..66ad1fb9c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -25,7 +25,6 @@
 
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@@ -86,14 +85,14 @@ public class RecalDataManager {
         PURGE_READ
     }
 
-    RecalDataManager() {
+    public RecalDataManager() {
         data = new NestedHashMap();
         dataCollapsedReadGroup = null;
         dataCollapsedQualityScore = null;
         dataCollapsedByCovariate = null;
     }
 
-    RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
+    public RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
         if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
             data = null;
             dataCollapsedReadGroup = new NestedHashMap();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index 6e214c6bb..a569aefd2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -181,7 +181,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     /////////////////////////////
     private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
 
-
     //---------------------------------------------------------------------------------------------------------------
     //
     // initialize
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 7ec6a74d7..9722f901b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -55,6 +55,14 @@ public class QualityUtils {
         return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
     }
 
+    static public double[] qualArrayToLog10ErrorProb(byte[] quals) {
+        double[] returnArray = new double[quals.length];
+        for( int iii = 0; iii < quals.length; iii++ ) {
+            returnArray[iii] = ((double) quals[iii])/-10.0;
+        }
+        return returnArray;
+    }
+    
     /**
      * Convert a probability to a quality score.  Note, this is capped at Q40.
      *
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
index 9e2a66f6e..a4830223e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
@@ -27,7 +27,6 @@ public class PileupElement implements Comparable<PileupElement> {
     protected final boolean isBeforeInsertion;
     protected final boolean isNextToSoftClip;
 
-
     /**
      * Creates a new pileup element.
      *
@@ -89,6 +88,14 @@ public class PileupElement implements Comparable<PileupElement> {
     public byte getQual() {
         return getQual(offset);
     }
+    
+    public byte getBaseInsertionQual() {
+        return getBaseInsertionQual(offset);
+    }
+
+    public byte getBaseDeletionQual() {
+        return getBaseDeletionQual(offset);
+    }
 
     public int getMappingQual() {
         return read.getMappingQuality();
@@ -111,6 +118,14 @@ public class PileupElement implements Comparable<PileupElement> {
         return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset];
     }
 
+    protected byte getBaseInsertionQual(final int offset) {
+        return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseInsertionQualities()[offset];
+    }
+
+    protected byte getBaseDeletionQual(final int offset) {
+        return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseDeletionQualities()[offset];
+    }
+
     @Override
     public int compareTo(final PileupElement pileupElement) {
         if (offset < pileupElement.offset)
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
new file mode 100644
index 000000000..2e785043d
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.recalibration;
+
+import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
+import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
+import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.classloader.PluginManager;
+import org.broadinstitute.sting.utils.collections.NestedHashMap;
+import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.text.XReadLines;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * Utility methods to facilitate on-the-fly base quality score recalibration.
+ * 
+ * User: rpoplin
+ * Date: 2/4/12
+ */
+
+public class BaseRecalibration {
+
+    public enum BaseRecalibrationType {
+        BASE_SUBSTITUTION,
+        BASE_INSERTION,
+        BASE_DELETION
+    }
+
+    private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
+    private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
+    public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
+    public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
+    public static final String EOF_MARKER = "EOF";
+    private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
+    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
+
+    public BaseRecalibration( final File RECAL_FILE ) {
+        // Get a list of all available covariates
+        final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
+
+        int lineNumber = 0;
+        boolean foundAllCovariates = false;
+
+        // Read in the data from the csv file and populate the data map and covariates list
+        boolean sawEOF = false;
+        try {
+            for ( String line : new XReadLines(RECAL_FILE) ) {
+                lineNumber++;
+                if ( EOF_MARKER.equals(line) ) {
+                    sawEOF = true;
+                } else if( COMMENT_PATTERN.matcher(line).matches() )  {
+                    ; // Skip over the comment lines, (which start with '#')
+                }
+                // Read in the covariates that were used from the input file
+                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
+                    if( foundAllCovariates ) {
+                        throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
+                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
+                        String[] vals = line.split(",");
+                        for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
+                            boolean foundClass = false;
+                            for( Class<?> covClass : classes ) {
+                                if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
+                                    foundClass = true;
+                                    try {
+                                        Covariate covariate = (Covariate)covClass.newInstance();
+                                        requestedCovariates.add( covariate );
+                                    } catch (Exception e) {
+                                        throw new DynamicClassResolutionException(covClass, e);
+                                    }
+
+                                }
+                            }
+
+                            if( !foundClass ) {
+                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
+                            }
+                        }
+                    }
+
+                } else { // Found a line of data
+                    if( !foundAllCovariates ) {
+                        foundAllCovariates = true;
+
+                        // At this point all the covariates should have been found and initialized
+                        if( requestedCovariates.size() < 2 ) {
+                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
+                        }
+
+                        final boolean createCollapsedTables = true;
+
+                        // Initialize any covariate member variables using the shared argument collection
+                        for( Covariate cov : requestedCovariates ) {
+                            cov.initialize( null ); // BUGBUG: do any of the used covariates actually need the RecalibrationArgumentCollection?
+                        }
+                        // Initialize the data hashMaps
+                        dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
+
+                    }
+                    addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
+                }
+            }
+
+        } catch ( FileNotFoundException e ) {
+            throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
+        } catch ( NumberFormatException e ) {
+            throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
+        }
+
+        if ( !sawEOF ) {
+            final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
+            throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
+        }
+
+        if( dataManager == null ) {
+            throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
+        }
+
+        dataManager.generateEmpiricalQualities( 1, MAX_QUALITY_SCORE );
+    }
+    
+    /**
+     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
+     * @param line A line of CSV data read from the recalibration table data file
+     */
+    private void addCSVData(final File file, final String line) {
+        final String[] vals = line.split(",");
+
+        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
+        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
+            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
+                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
+        }
+
+        final Object[] key = new Object[requestedCovariates.size()];
+        Covariate cov;
+        int iii;
+        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
+            cov = requestedCovariates.get( iii );
+            key[iii] = cov.getValue( vals[iii] );
+        }
+
+        // Create a new datum using the number of observations, number of mismatches, and reported quality score
+        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
+        // Add that datum to all the collapsed tables which will be used in the sequential calculation
+        dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
+    }
+    
+    public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals ) {
+
+        final byte[] recalQuals = originalQuals.clone();
+        
+        //compute all covariate values for this read
+        final Comparable[][] covariateValues_offset_x_covar =
+                RecalDataManager.computeCovariates(read, requestedCovariates);
+    
+        // For each base in the read
+        for( int offset = 0; offset < read.getReadLength(); offset++ ) {
+    
+            final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
+    
+            Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
+            if(qualityScore == null)
+            {
+                qualityScore = performSequentialQualityCalculation( fullCovariateKey );
+                qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
+            }
+    
+            recalQuals[offset] = qualityScore;
+        }
+    
+        preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
+        
+        return recalQuals;
+    }
+
+    /**
+     * Implements a serial recalibration of the reads using the combinational table.
+     * First, we perform a positional recalibration, and then a subsequent dinuc correction.
+     *
+     * Given the full recalibration table, we perform the following preprocessing steps:
+     *
+     *   - calculate the global quality score shift across all data [DeltaQ]
+     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
+     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
+     *   - The final shift equation is:
+     *
+     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
+     * @param key The list of Comparables that were calculated from the covariates
+     * @return A recalibrated quality score as a byte
+     */
+    private byte performSequentialQualityCalculation( final Object... key ) {
+
+        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
+        final Object[] readGroupCollapsedKey = new Object[1];
+        final Object[] qualityScoreCollapsedKey = new Object[2];
+        final Object[] covariateCollapsedKey = new Object[3];
+
+        // The global quality shift (over the read group only)
+        readGroupCollapsedKey[0] = key[0];
+        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
+        double globalDeltaQ = 0.0;
+        if( globalRecalDatum != null ) {
+            final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
+            final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
+            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
+        }
+
+        // The shift in quality between reported and empirical
+        qualityScoreCollapsedKey[0] = key[0];
+        qualityScoreCollapsedKey[1] = key[1];
+        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
+        double deltaQReported = 0.0;
+        if( qReportedRecalDatum != null ) {
+            final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
+            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
+        }
+
+        // The shift in quality due to each covariate by itself in turn
+        double deltaQCovariates = 0.0;
+        double deltaQCovariateEmpirical;
+        covariateCollapsedKey[0] = key[0];
+        covariateCollapsedKey[1] = key[1];
+        for( int iii = 2; iii < key.length; iii++ ) {
+            covariateCollapsedKey[2] =  key[iii]; // The given covariate
+            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
+            if( covariateRecalDatum != null ) {
+                deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
+                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
+            }
+        }
+
+        final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
+        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
+
+        // Verbose printouts used to validate with old recalibrator
+        //if(key.contains(null)) {
+        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
+        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
+        //}
+        //else {
+        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
+        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
+        //}
+
+        //return newQualityByte;
+    }
+
+    /**
+     * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
+     * @param originalQuals The list of original base quality scores
+     * @param recalQuals A list of the new recalibrated quality scores
+     */
+    private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
+        for( int iii = 0; iii < recalQuals.length; iii++ ) {
+            if( originalQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ) { //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
+                recalQuals[iii] = originalQuals[iii];
+            }
+        }
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 03b794ae3..e9b46ac24 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -25,8 +25,10 @@
 package org.broadinstitute.sting.utils.sam;
 
 import net.sf.samtools.*;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.NGSPlatform;
 
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -48,6 +50,11 @@ public class GATKSAMRecord extends BAMRecord {
     public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP";  // reads that are clipped may use this attribute to keep track of their original alignment start
     public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE";    // reads that are clipped may use this attribute to keep track of their original alignment end
 
+    // Base Quality Score Recalibrator specific attribute tags
+    public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
+    public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
+    public static final String BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG = "BR";
+
     // the SAMRecord data we're caching
     private String mReadString = null;
     private GATKSAMReadGroupRecord mReadGroup = null;
@@ -155,6 +162,60 @@ public class GATKSAMRecord extends BAMRecord {
         return super.equals(o);
     }
 
+    /*
+    @Override
+    public byte[] getBaseQualities() {
+        if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
+            return super.getBaseQualities();
+        } else {
+            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
+            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
+                final byte[] quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, super.getBaseQualities() );
+                setBaseQualities(quals);
+                setAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG, true );
+                return quals;
+            } else { // just use the qualities that are in the read since we don't have the sufficient information to recalibrate on the fly
+                return super.getBaseQualities();
+            }
+        }
+    }
+    */
+
+    /**
+     * Accessors for base insertion and base deletion quality scores
+     */
+    public byte[] getBaseInsertionQualities() {
+        byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
+        if( quals == null ) {
+            quals = new byte[getBaseQualities().length];
+            Arrays.fill(quals, (byte) 45); // allow for differing default values between BaseInsertions and BaseDeletions
+            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
+            // else give default values which are flat Q45
+            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
+                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
+            }
+            // add the qual array to the read so that we don't have to do the recalibration work again
+            setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
+        }
+        return quals;
+    }
+
+    public byte[] getBaseDeletionQualities() {
+        byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
+        if( quals == null ) {
+            quals = new byte[getBaseQualities().length];
+            Arrays.fill(quals, (byte) 45);
+            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
+            // else give default values which are flat Q45
+            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
+                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
+            }
+            // add the qual array to the read so that we don't have to do the recalibration work again
+            setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
+        }
+        return quals;
+    }
+
     /**
      * Efficient caching accessor that returns the GATK NGSPlatform of this read
      * @return

From cef550903ee0af5f151ec96f7f56693774f5e85a Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 6 Feb 2012 00:48:00 -0500
Subject: [PATCH 09/67] Minor optimization

---
 .../gatk/walkers/annotator/TransmissionDisequilibriumTest.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
index 34f4bd607..d84ba44bc 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
@@ -73,10 +73,11 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
         // for each pair of alleles, add the likelihoods
         int numAlleles = vc.getNAlleles();
         for ( int allele1 = 0; allele1 < numAlleles; allele1++ ) {
+            final int HOM1index = determineHomIndex(allele1, numAlleles);
+
             for ( int allele2 = allele1 + 1; allele2 < numAlleles; allele2++ ) {
 
                 // TODO -- cache these for better performance
-                final int HOM1index = determineHomIndex(allele1, numAlleles);
                 final int HETindex = HOM1index + (allele2 - allele1);
                 final int HOM2index = determineHomIndex(allele2, numAlleles);
 

From b7ffd144e852ef0ebf479d1e7388cde76c37d33a Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 6 Feb 2012 08:54:42 -0500
Subject: [PATCH 10/67] Cleaning up the covariate classes and removing unused
 code from the bqsr optimizations in 2009.

---
 .../sting/gatk/GenomeAnalysisEngine.java      |  4 +-
 .../arguments/GATKArgumentCollection.java     |  2 +-
 .../recalibration/ContextCovariate.java       |  4 +-
 .../walkers/recalibration/CycleCovariate.java |  3 ++
 .../walkers/recalibration/DinucCovariate.java | 39 ++-----------------
 .../recalibration/GCContentCovariate.java     | 10 ++---
 .../recalibration/HomopolymerCovariate.java   |  6 ++-
 .../MappingQualityCovariate.java              |  6 ++-
 .../recalibration/MinimumNQSCovariate.java    | 15 ++++---
 .../recalibration/PositionCovariate.java      | 14 ++++---
 .../recalibration/PrimerRoundCovariate.java   | 15 ++++---
 .../recalibration/QualityScoreCovariate.java  | 11 ++----
 .../recalibration/ReadGroupCovariate.java     | 11 ++----
 .../sting/utils/sam/GATKSAMRecord.java        |  6 ++-
 14 files changed, 61 insertions(+), 85 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 97d1de1fa..c0db75aa9 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -215,8 +215,8 @@ public class GenomeAnalysisEngine {
             resetRandomGenerator(System.currentTimeMillis());
 
         // if the use specified an input BQSR recalibration table then enable on the fly recalibration
-        if (this.getArguments().RECAL_FILE != null)
-            setBaseRecalibration(this.getArguments().RECAL_FILE);
+        if (this.getArguments().BQSR_RECAL_FILE != null)
+            setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE);
 
         // Determine how the threads should be divided between CPU vs. IO.
         determineThreadAllocation();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 206fa5765..b3a1e2488 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -192,7 +192,7 @@ public class GATKArgumentCollection {
      * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
      */
     @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration")
-    public File RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
+    public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
 
     @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
     public byte defaultBaseQualities = -1;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
index 837062dd2..8b8f2cee9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
@@ -41,12 +41,14 @@ public class ContextCovariate implements Covariate {
     String allN = "";
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) {
             allN += "N";
         }
     }
 
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         byte[] bases = read.getReadBases();
         for(int i = 0; i < read.getReadLength(); i++) {
@@ -55,8 +57,8 @@ public class ContextCovariate implements Covariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return str;
     }
-
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
index b0819ee69..e72b426d0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
@@ -51,6 +51,7 @@ public class CycleCovariate implements StandardCovariate {
     private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454,  NGSPlatform.ION_TORRENT);
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         if( RAC.DEFAULT_PLATFORM != null ) {
             if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
@@ -63,6 +64,7 @@ public class CycleCovariate implements StandardCovariate {
     }
 
     // Used to pick out the covariate's value from attributes of the read
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
 
         //-----------------------------
@@ -164,6 +166,7 @@ public class CycleCovariate implements StandardCovariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return Integer.parseInt( str );
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
index a7717161a..90768fe90 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
@@ -48,6 +48,7 @@ public class DinucCovariate implements StandardCovariate {
     private HashMap<Integer, Dinuc> dinucHashMap;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         final byte[] BASES = { (byte)'A', (byte)'C', (byte)'G', (byte)'T' };
         dinucHashMap = new HashMap<Integer, Dinuc>();
@@ -60,44 +61,10 @@ public class DinucCovariate implements StandardCovariate {
         dinucHashMap.put( Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC );
     }
 
-    /*
-    // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
-
-        byte base;
-        byte prevBase;
-        final byte[] bases = read.getReadBases();
-        // If this is a negative strand read then we need to reverse the direction for our previous base
-        if( read.getReadNegativeStrandFlag() ) {
-            // No dinuc at the beginning of the read
-            if( offset == bases.length-1 ) {
-                return NO_DINUC;
-            }
-            base = (byte)BaseUtils.simpleComplement( (char)(bases[offset]) );
-            // Note: We are using the previous base in the read, not the previous base in the reference. This is done in part to be consistent with unmapped reads.
-            prevBase = (byte)BaseUtils.simpleComplement( (char)(bases[offset + 1]) );
-        } else {
-            // No dinuc at the beginning of the read
-            if( offset == 0 ) {
-                return NO_DINUC;
-            }
-            base = bases[offset];
-            // Note: We are using the previous base in the read, not the previous base in the reference. This is done in part to be consistent with unmapped reads.
-            prevBase = bases[offset - 1];
-        }
-
-        // Make sure the previous base is good
-        if( !BaseUtils.isRegularBase( prevBase ) ) {
-            return NO_DINUC;
-        }
-
-        return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) );
-    }
-    */
-
     /**
      * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
      */
+    @Override
     public void getValues( SAMRecord read, Comparable[] result ) {
         final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
         final int readLength = read.getReadLength();
@@ -134,6 +101,7 @@ public class DinucCovariate implements StandardCovariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         byte[] bytes = str.getBytes();
         final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) );
@@ -143,7 +111,6 @@ public class DinucCovariate implements StandardCovariate {
         return returnDinuc;
     }
 
-
     /**
      * Reverses the given array in place.
      *
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
index be4e4ebfc..1a085d5c0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
@@ -41,12 +41,13 @@ public class GCContentCovariate implements ExperimentalCovariate {
     int numBack = 7;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         numBack = RAC.HOMOPOLYMER_NBACK;
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
 
         // ATTGCCCCGTAAAAAAAGAGAA
         // 0000123456654321001122
@@ -75,7 +76,8 @@ public class GCContentCovariate implements ExperimentalCovariate {
             return -1;
         }
     }
-    
+
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
@@ -83,10 +85,8 @@ public class GCContentCovariate implements ExperimentalCovariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return Integer.parseInt( str );
     }
-
-
-
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
index f9a75de6f..a54f9597b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
@@ -43,12 +43,13 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
     int numBack = 7;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         numBack = RAC.HOMOPOLYMER_NBACK;
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
 
         // This block of code is for if you don't want to only count consecutive bases
         // ATTGCCCCGTAAAAAAAAATA
@@ -90,6 +91,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
         return numAgree;
     }
 
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
@@ -97,8 +99,8 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return Integer.parseInt( str );
     }
-
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
index f9149a528..ad6484428 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
@@ -38,23 +38,25 @@ import net.sf.samtools.SAMRecord;
 public class MappingQualityCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
         return read.getMappingQuality();
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return Integer.parseInt( str );
     }
 
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
-
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
index 64cae2b62..0c1c66a5f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
@@ -41,12 +41,13 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
     private int windowReach; // How far in each direction from the current base to look
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
         windowReach = RAC.WINDOW_SIZE / 2; // integer division
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
 
         // Loop over the list of base quality scores in the window and find the minimum
         final byte[] quals = read.getBaseQualities();
@@ -61,14 +62,16 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
         return minQual;
     }
 
-    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
-    }
-
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
+
+    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
+    public final Comparable getValue( final String str ) {
+        return Integer.parseInt( str );
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
index 2495df57a..2a4497b0d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
@@ -39,11 +39,12 @@ import net.sf.samtools.SAMRecord;
 public class PositionCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
         int cycle = offset;
         if( read.getReadNegativeStrandFlag() ) {
             cycle = read.getReadLength() - (offset + 1);
@@ -51,15 +52,16 @@ public class PositionCovariate implements ExperimentalCovariate {
         return cycle;
     }
 
-    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
-    }
-
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
+    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
+    public final Comparable getValue( final String str ) {
+        return Integer.parseInt( str );
+    }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
index 23fdeebe3..4a9629234 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
@@ -40,11 +40,12 @@ import net.sf.samtools.SAMRecord;
 public class PrimerRoundCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
+    private final Comparable getValue( final SAMRecord read, final int offset ) {
         if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" ) ) {
             int pos = offset;
             if( read.getReadNegativeStrandFlag() ) {
@@ -57,14 +58,16 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
 
     }
 
-    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
-    }
-
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
+
+    // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
+    public final Comparable getValue( final String str ) {
+        return Integer.parseInt( str );
+    }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
index df0101e18..de6d5065b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
@@ -38,16 +38,11 @@ import net.sf.samtools.SAMRecord;
 public class QualityScoreCovariate implements RequiredCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
     }
 
-    /*
-    // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
-        return (int)(read.getBaseQualities()[offset]);
-    }
-    */
-
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         byte[] baseQualities = read.getBaseQualities();
         for(int i = 0; i < read.getReadLength(); i++) {
@@ -56,8 +51,8 @@ public class QualityScoreCovariate implements RequiredCovariate {
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return Integer.parseInt( str );
     }
-
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
index 0c853c349..cb108feb8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
@@ -40,16 +40,11 @@ public class ReadGroupCovariate implements RequiredCovariate{
     public static final String defaultReadGroup = "DefaultReadGroup";
 
     // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
     public void initialize( final RecalibrationArgumentCollection RAC ) {
     }
 
-    /*
-    // Used to pick out the covariate's value from attributes of the read
-    public final Comparable getValue( final SAMRecord read, final int offset ) {
-        return read.getReadGroup().getReadGroupId();
-    }
-    */
-
+    @Override
     public void getValues(SAMRecord read, Comparable[] comparable) {
         final String readGroupId = read.getReadGroup().getReadGroupId();
         for(int i = 0; i < read.getReadLength(); i++) {
@@ -58,10 +53,10 @@ public class ReadGroupCovariate implements RequiredCovariate{
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+    @Override
     public final Comparable getValue( final String str ) {
         return str;
     }
-
 }
 
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index e9b46ac24..66e957a41 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -162,9 +162,11 @@ public class GATKSAMRecord extends BAMRecord {
         return super.equals(o);
     }
 
-    /*
+
     @Override
     public byte[] getBaseQualities() {
+        return super.getBaseQualities();
+        /*
         if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
             return super.getBaseQualities();
         } else {
@@ -178,8 +180,8 @@ public class GATKSAMRecord extends BAMRecord {
                 return super.getBaseQualities();
             }
         }
+        */
     }
-    */
 
     /**
      * Accessors for base insertion and base deletion quality scores

From 9d94f310f15d0a99dca19d83e1de6c6ff195bda2 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 6 Feb 2012 09:01:19 -0500
Subject: [PATCH 11/67] Break AF histogram into max and min AFs

---
 .../evaluators/MultiallelicSummary.java       | 43 ++++++++++++-------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
index 9113e7538..82b6656af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
@@ -90,8 +90,11 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
     @DataPoint(description = "Multi-allelic Indel Novelty Rate")
     public String indelNoveltyRate = "NA";
 
-    @DataPoint(description="Histogram of allele frequencies")
-    AFHistogram AFhistogram = new AFHistogram();
+    @DataPoint(description="Histogram of allele frequencies for most common alternate allele")
+    AFHistogram AFhistogramMax = new AFHistogram();
+
+    @DataPoint(description="Histogram of allele frequencies for less common alternate alleles")
+    AFHistogram AFhistogramMin = new AFHistogram();
 
     /*
      * AF histogram table object
@@ -130,18 +133,10 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
 
         public String getName() { return "AFHistTable"; }
 
-        public void update(VariantContext vc) {
-            final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
-            if ( obj == null || !(obj instanceof List) )
-                return;
-
-            List<String> list = (List<String>)obj;
-            for ( String str : list ) {
-                final double AF = Double.valueOf(str);
-                final int bin = (int)(numBins * MathUtils.round(AF, 2));
-                AFhistogram[bin]++;
-            }
-        }
+        public void update(final double AF) {
+            final int bin = (int)(numBins * MathUtils.round(AF, 2));
+            AFhistogram[bin]++;
+       }
     }
 
     public void initialize(VariantEvalWalker walker) {}
@@ -180,7 +175,7 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
             default:
                 throw new UserException.BadInput("Unexpected variant context type: " + eval);
         }
-        AFhistogram.update(eval);
+        updateAFhistogram(eval);
         
         return null; // we don't capture any interesting sites
     }
@@ -213,6 +208,24 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
     private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
     }
 
+    private void updateAFhistogram(VariantContext vc) {
+
+        final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
+        if ( obj == null || !(obj instanceof List) )
+            return;
+
+        List<String> list = (List<String>)obj;
+        ArrayList<Double> AFs = new ArrayList<Double>(list.size());
+        for ( String str : list ) {
+            AFs.add(Double.valueOf(str));
+        }
+
+        Collections.sort(AFs);
+        AFhistogramMax.update(AFs.get(AFs.size()-1));
+        for ( int i = 0; i < AFs.size() - 1; i++ )
+            AFhistogramMin.update(AFs.get(i));
+    }
+    
     private final String noveltyRate(final int all, final int known) {
         final int novel = all - known;
         final double rate = (novel / (1.0 * all));

From 090d87b48b9113feb5ce8a4e081440ea332e9e6e Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Mon, 6 Feb 2012 10:33:12 -0500
Subject: [PATCH 12/67] Bug fix in ValidationSiteSelector: when input vcf had
 genotypes and was multiallelic, the parsing of the AF/AC fields was wrong.
 Better logic to unify parsing of field

---
 .../KeepAFSpectrumFrequencySelector.java      | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
index 15274d21c..4b68eed2e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java
@@ -71,25 +71,24 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
             // recompute AF,AC,AN based on genotypes:
             // todo - - maybe too inefficient??
             VariantContextUtils.calculateChromosomeCounts(vc, attributes, false);
-            afArray =  new double[] {Double.valueOf((String)attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY))};
-        }   else {
-            // sites-only vc or we explicitly tell to ignore genotypes; we trust the AF field if present
-            if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )  {
-                String afo = vc.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
+        }
 
-                if (afo.contains(",")) {
-                    String[] afs = afo.split(",");
-                    afs[0] = afs[0].substring(1,afs[0].length());
-                    afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
+        // sites-only vc or we explicitly tell to ignore genotypes; we trust the AF field if present
+        if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )  {
+            String afo = vc.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
 
-                    afArray = new double[afs.length];
+            if (afo.contains(",")) {
+                String[] afs = afo.split(",");
+                afs[0] = afs[0].substring(1,afs[0].length());
+                afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
 
-                    for (int k=0; k < afArray.length; k++)
-                        afArray[k] = Double.valueOf(afs[k]);
-                }
-                else
-                    afArray = new double[] {Double.valueOf(afo)};
+                afArray = new double[afs.length];
+
+                for (int k=0; k < afArray.length; k++)
+                    afArray[k] = Double.valueOf(afs[k]);
             }
+            else
+                afArray = new double[] {Double.valueOf(afo)};
         }
 
 

From dc05b71e39b1e0124a5954a9c561d4556269117d Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 6 Feb 2012 11:10:24 -0500
Subject: [PATCH 13/67] Updating Covariate interface with Mauricio to include
 an errorModel parameter. On the fly recalibration of base insertion and base
 deletion quals is live for the HaplotypeCaller

---
 .../walkers/recalibration/ContextCovariate.java  |  5 +++--
 .../recalibration/CountCovariatesWalker.java     |  3 ++-
 .../gatk/walkers/recalibration/Covariate.java    | 10 ++++++----
 .../walkers/recalibration/CycleCovariate.java    |  3 ++-
 .../walkers/recalibration/DinucCovariate.java    | 11 ++++++-----
 .../recalibration/GCContentCovariate.java        |  3 ++-
 .../recalibration/HomopolymerCovariate.java      |  3 ++-
 .../recalibration/MappingQualityCovariate.java   |  3 ++-
 .../recalibration/MinimumNQSCovariate.java       |  3 ++-
 .../walkers/recalibration/PositionCovariate.java |  3 ++-
 .../recalibration/PrimerRoundCovariate.java      |  3 ++-
 .../recalibration/QualityScoreCovariate.java     | 16 ++++++++++++----
 .../recalibration/ReadGroupCovariate.java        |  5 +++--
 .../walkers/recalibration/RecalDataManager.java  |  5 +++--
 .../recalibration/TableRecalibrationWalker.java  |  3 ++-
 .../utils/recalibration/BaseRecalibration.java   |  4 ++--
 .../sting/utils/sam/GATKSAMRecord.java           |  5 +++--
 17 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
index 8b8f2cee9..0edd5d03b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
@@ -26,6 +26,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 import java.util.Arrays;
 
@@ -35,7 +36,7 @@ import java.util.Arrays;
  * Date: 9/26/11
  */
 
-public class ContextCovariate implements Covariate {
+public class ContextCovariate implements ExperimentalCovariate {
 
     final int CONTEXT_SIZE = 8;
     String allN = "";
@@ -49,7 +50,7 @@ public class ContextCovariate implements Covariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         byte[] bases = read.getReadBases();
         for(int i = 0; i < read.getReadLength(); i++) {
             comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index f6f05d39c..4e3d4048b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.io.PrintStream;
@@ -374,7 +375,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
                     RecalDataManager.parseColorSpace( gatkRead );
                     gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
-                            RecalDataManager.computeCovariates( gatkRead, requestedCovariates ));
+                            RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
                 }
 
                 // Skip this position if base quality is zero
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
index 46ce006ee..2e32dbb8c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -32,7 +33,7 @@ import net.sf.samtools.SAMRecord;
  * User: rpoplin
  * Date: Oct 30, 2009
  *
- * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read, offset, and corresponding reference bases
+ * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read.
  * In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code.
  * This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up.
  */
@@ -40,9 +41,10 @@ import net.sf.samtools.SAMRecord;
 public interface Covariate {
     public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
     public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    public void getValues( SAMRecord read, Comparable[] comparable ); //Takes an array of size (at least) read.getReadLength() and fills it with covariate
-        //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
-        //read-specific calculations to be done just once rather than for each offset.
+    public void getValues( SAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType );
+            //Takes an array of size (at least) read.getReadLength() and fills it with covariate
+            //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
+            //read-specific calculations to be done just once rather than for each offset.
 }
 
 interface RequiredCovariate extends Covariate {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
index e72b426d0..00490d898 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
@@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.EnumSet;
@@ -65,7 +66,7 @@ public class CycleCovariate implements StandardCovariate {
 
     // Used to pick out the covariate's value from attributes of the read
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
 
         //-----------------------------
         // Illumina, Solid, PacBio, and Complete Genomics
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
index 90768fe90..e60b1f795 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 import java.util.HashMap;
 
@@ -65,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
      * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
      */
     @Override
-    public void getValues( SAMRecord read, Comparable[] result ) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
         final int readLength = read.getReadLength();
         final boolean negativeStrand = read.getReadNegativeStrandFlag();
@@ -78,7 +79,7 @@ public class DinucCovariate implements StandardCovariate {
         if(negativeStrand) {
             bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
         }
-        result[0] = NO_DINUC; // No dinuc at the beginning of the read
+        comparable[0] = NO_DINUC; // No dinuc at the beginning of the read
 
         prevBase = bases[0];
         offset++;
@@ -87,16 +88,16 @@ public class DinucCovariate implements StandardCovariate {
              // previous base in the reference. This is done in part to be consistent with unmapped reads.
              base = bases[offset];
              if( BaseUtils.isRegularBase( prevBase ) ) {
-                 result[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
+                 comparable[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
              } else {
-                 result[offset] = NO_DINUC;
+                 comparable[offset] = NO_DINUC;
              }
 
              offset++;
              prevBase = base;
         }
         if(negativeStrand) {
-            reverse( result );
+            reverse( comparable );
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
index 1a085d5c0..e4ff415fe 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2010 The Broad Institute
@@ -78,7 +79,7 @@ public class GCContentCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
index a54f9597b..24cb98a8d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -92,7 +93,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
index ad6484428..ec5b357a4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -54,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
index 0c1c66a5f..21fd14e0c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -63,7 +64,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
index 2a4497b0d..5c410ce5f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -53,7 +54,7 @@ public class PositionCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
index 4a9629234..e6aa44226 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -59,7 +60,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         for(int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
index de6d5065b..f85b52350 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
@@ -1,6 +1,9 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+
+import java.util.Arrays;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -43,10 +46,15 @@ public class QualityScoreCovariate implements RequiredCovariate {
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
-        byte[] baseQualities = read.getBaseQualities();
-        for(int i = 0; i < read.getReadLength(); i++) {
-            comparable[i] = (int) baseQualities[i];
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
+        if( modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ) {
+            byte[] baseQualities = read.getBaseQualities();
+            for(int i = 0; i < read.getReadLength(); i++) {
+                comparable[i] = (int) baseQualities[i];
+            }
+        } else { // model == BASE_INSERTION || model == BASE_DELETION
+            Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
+                                         // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
index cb108feb8..e27077128 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -35,7 +36,7 @@ import net.sf.samtools.SAMRecord;
  * The Read Group covariate.
  */
 
-public class ReadGroupCovariate implements RequiredCovariate{
+public class ReadGroupCovariate implements RequiredCovariate {
 
     public static final String defaultReadGroup = "DefaultReadGroup";
 
@@ -45,7 +46,7 @@ public class ReadGroupCovariate implements RequiredCovariate{
     }
 
     @Override
-    public void getValues(SAMRecord read, Comparable[] comparable) {
+    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
         final String readGroupId = read.getReadGroup().getReadGroupId();
         for(int i = 0; i < read.getReadLength(); i++) {
             comparable[i] = readGroupId;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 66ad1fb9c..be02063de 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.collections.NestedHashMap;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -571,7 +572,7 @@ public class RecalDataManager {
      * value for the ith position in the read and the jth covariate in
      * reqeustedCovariates list.
      */
-     public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
+     public static Comparable[][] computeCovariates( final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType ) {
          //compute all covariates for this read
          final List<Covariate> requestedCovariatesRef = requestedCovariates;
          final int numRequestedCovariates = requestedCovariatesRef.size();
@@ -582,7 +583,7 @@ public class RecalDataManager {
 
          // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
          for( int i = 0; i < numRequestedCovariates; i++ ) {
-             requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder );
+             requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder, modelType );
              for(int j = 0; j < readLength; j++) {
                  //copy values into a 2D array that allows all covar types to be extracted at once for
                  //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index a569aefd2..a8006d506 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.collections.NestedHashMap;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.text.TextFormattingUtils;
 import org.broadinstitute.sting.utils.text.XReadLines;
@@ -398,7 +399,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
         //compute all covariate values for this read
         final Comparable[][] covariateValues_offset_x_covar =
-            RecalDataManager.computeCovariates(read, requestedCovariates);
+            RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
 
         // For each base in the read
         for( int offset = 0; offset < read.getReadLength(); offset++ ) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index 2e785043d..ce52f09a2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -177,13 +177,13 @@ public class BaseRecalibration {
         dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
     }
     
-    public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals ) {
+    public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) {
 
         final byte[] recalQuals = originalQuals.clone();
         
         //compute all covariate values for this read
         final Comparable[][] covariateValues_offset_x_covar =
-                RecalDataManager.computeCovariates(read, requestedCovariates);
+                RecalDataManager.computeCovariates(read, requestedCovariates, modelType);
     
         // For each base in the read
         for( int offset = 0; offset < read.getReadLength(); offset++ ) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 66e957a41..bdcf2b210 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.sam;
 import net.sf.samtools.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.NGSPlatform;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 import java.util.Arrays;
 import java.util.HashMap;
@@ -194,7 +195,7 @@ public class GATKSAMRecord extends BAMRecord {
             // if the recal data was populated in the engine then recalibrate the quality scores on the fly
             // else give default values which are flat Q45
             if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
-                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
+                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
             }
             // add the qual array to the read so that we don't have to do the recalibration work again
             setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
@@ -210,7 +211,7 @@ public class GATKSAMRecord extends BAMRecord {
             // if the recal data was populated in the engine then recalibrate the quality scores on the fly
             // else give default values which are flat Q45
             if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
-                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
+                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
             }
             // add the qual array to the read so that we don't have to do the recalibration work again
             setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );

From edb4edc08fb0dea2aeea61afcfe4fd39faa7ada1 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 6 Feb 2012 11:53:15 -0500
Subject: [PATCH 14/67] Commented out unused metrics for now

---
 .../varianteval/evaluators/MultiallelicSummary.java        | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
index 82b6656af..eef73c190 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
@@ -83,11 +83,12 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
     @DataPoint(description = "Multi-allelic SNP Novelty Rate")
     public String SNPNoveltyRate = "NA";
 
-    @DataPoint(description = "Multi-allelic Indels partially known")
+    //TODO -- implement me
+    //@DataPoint(description = "Multi-allelic Indels partially known")
     public int knownIndelsPartial = 0;
-    @DataPoint(description = "Multi-allelic Indels completely known")
+    //@DataPoint(description = "Multi-allelic Indels completely known")
     public int knownIndelsComplete = 0;
-    @DataPoint(description = "Multi-allelic Indel Novelty Rate")
+    //@DataPoint(description = "Multi-allelic Indel Novelty Rate")
     public String indelNoveltyRate = "NA";
 
     @DataPoint(description="Histogram of allele frequencies for most common alternate allele")

From 827be878b4b444f047d1fab5819d3b5b54a7c3ce Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Mon, 6 Feb 2012 11:58:13 -0500
Subject: [PATCH 15/67] Bug fix when running UG in GenotypeGivenAlleles mode:
 if an input site to genotype had no coverage, the output VCF had AC,AF and AN
 inherited from input VCF, which could have nothing to do with given BAM so
 numbers could be non-sensical. Now new vc has clear attributes instead of
 attributes inherited from input VCF.

---
 .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index aa33d39e3..5cdef6cf5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -252,7 +252,7 @@ public class UnifiedGenotyperEngine {
             VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
             if ( vcInput == null )
                 return null;
-            vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).make();
+            vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).attributes(new HashMap<String, Object>()).make();
         } else {
             // deal with bad/non-standard reference bases
             if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )

From 6ec686b8779770a99b9aa8cd3aa27bcc6de0c8d9 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Mon, 6 Feb 2012 13:19:26 -0500
Subject: [PATCH 16/67] Complement to previous commit: make sure we also don't
 inherit filter from input VCF when genotyping at an empty site

---
 .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 5cdef6cf5..aae781628 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -252,7 +252,7 @@ public class UnifiedGenotyperEngine {
             VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
             if ( vcInput == null )
                 return null;
-            vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).attributes(new HashMap<String, Object>()).make();
+            vc = new VariantContextBuilder(vcInput).source("UG_call").noID().referenceBaseForIndel(ref.getBase()).attributes(new HashMap<String, Object>()).filters(new HashSet<String>()).make();
         } else {
             // deal with bad/non-standard reference bases
             if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )

From 91897f5fe78e452916e2e786890773bea536e970 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 6 Feb 2012 16:23:32 -0500
Subject: [PATCH 17/67] Transpose rows/cols in AF table to make it molten (so I
 can plot easily in R)

---
 .../evaluators/MultiallelicSummary.java       | 35 +++++++++++--------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
index eef73c190..97aebc376 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java
@@ -91,25 +91,31 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
     //@DataPoint(description = "Multi-allelic Indel Novelty Rate")
     public String indelNoveltyRate = "NA";
 
-    @DataPoint(description="Histogram of allele frequencies for most common alternate allele")
-    AFHistogram AFhistogramMax = new AFHistogram();
+    @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele")
+    AFHistogram AFhistogramMaxSnp = new AFHistogram();
 
-    @DataPoint(description="Histogram of allele frequencies for less common alternate alleles")
-    AFHistogram AFhistogramMin = new AFHistogram();
+    @DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles")
+    AFHistogram AFhistogramMinSnp = new AFHistogram();
+
+    @DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele")
+    AFHistogram AFhistogramMaxIndel = new AFHistogram();
+
+    @DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles")
+    AFHistogram AFhistogramMinIndel = new AFHistogram();
 
     /*
      * AF histogram table object
      */
     static class AFHistogram implements TableType {
-        private Object[] colKeys, rowKeys = {"pairwise_AF"};
+        private Object[] rowKeys, colKeys = {"count"};
         private int[] AFhistogram;
 
         private static final double AFincrement = 0.01;
         private static final int numBins = (int)(1.00 / AFincrement);
 
         public AFHistogram() {
-            colKeys = initColKeys();
-            AFhistogram = new int[colKeys.length];
+            rowKeys = initRowKeys();
+            AFhistogram = new int[rowKeys.length];
         }
 
         public Object[] getColumnKeys() {
@@ -121,10 +127,10 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
         }
 
         public Object getCell(int row, int col) {
-            return AFhistogram[col];
+            return AFhistogram[row];
         }
 
-        private static Object[] initColKeys() {
+        private static Object[] initRowKeys() {
             ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
             for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
                 keyList.add(String.format("%.2f", a));
@@ -164,6 +170,7 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
                     nMultiSNPs++;
                     calculatePairwiseTiTv(eval);
                     calculateSNPPairwiseNovelty(eval, comp);
+                    updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp);
                 }
                 break;
             case INDEL:
@@ -171,13 +178,13 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
                 if ( !eval.isBiallelic() ) {
                     nMultiIndels++;
                     calculateIndelPairwiseNovelty(eval, comp);
+                    updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel);
                 }
                 break;
             default:
                 throw new UserException.BadInput("Unexpected variant context type: " + eval);
         }
-        updateAFhistogram(eval);
-        
+
         return null; // we don't capture any interesting sites
     }
 
@@ -209,7 +216,7 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
     private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
     }
 
-    private void updateAFhistogram(VariantContext vc) {
+    private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) {
 
         final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
         if ( obj == null || !(obj instanceof List) )
@@ -222,9 +229,9 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
         }
 
         Collections.sort(AFs);
-        AFhistogramMax.update(AFs.get(AFs.size()-1));
+        max.update(AFs.get(AFs.size()-1));
         for ( int i = 0; i < AFs.size() - 1; i++ )
-            AFhistogramMin.update(AFs.get(i));
+            min.update(AFs.get(i));
     }
     
     private final String noveltyRate(final int all, final int known) {

From 6e6f0f10e1073ed4adf6aa05631f3a65c08503ee Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Mon, 6 Feb 2012 12:31:20 -0500
Subject: [PATCH 18/67] BaseQualityScoreRecalibration walker (bqsr v2) first
 commit includes

   * Adding the context covariate standard in both modes (including old CountCovariates) with parameters
   * Updating all covariates and modules to use GATKSAMRecord throughout the code.
   * BQSR now processes indels in the pileup (but doesn't do anything with them yet)
---
 .../recalibration/ContextCovariate.java       |  27 +-
 .../gatk/walkers/recalibration/Covariate.java |  25 +-
 .../walkers/recalibration/CycleCovariate.java | 134 +++--
 .../walkers/recalibration/DinucCovariate.java |  57 +-
 .../recalibration/GCContentCovariate.java     |  33 +-
 .../recalibration/HomopolymerCovariate.java   |  24 +-
 .../MappingQualityCovariate.java              |  16 +-
 .../recalibration/MinimumNQSCovariate.java    |  17 +-
 .../recalibration/PositionCovariate.java      |  15 +-
 .../recalibration/PrimerRoundCovariate.java   |  24 +-
 .../recalibration/QualityScoreCovariate.java  |  19 +-
 .../recalibration/ReadGroupCovariate.java     |  12 +-
 .../recalibration/RecalDataManager.java       | 537 ++++++++++--------
 .../RecalibrationArgumentCollection.java      |  25 +-
 14 files changed, 547 insertions(+), 418 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
index 0edd5d03b..875782fdc 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
@@ -25,8 +25,9 @@
 
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.Arrays;
 
@@ -38,28 +39,32 @@ import java.util.Arrays;
 
 public class ContextCovariate implements ExperimentalCovariate {
 
-    final int CONTEXT_SIZE = 8;
-    String allN = "";
+    private int CONTEXT_SIZE;
+    private String allN = "";
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
-        for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        CONTEXT_SIZE = RAC.CONTEXT_SIZE;
+
+        if (CONTEXT_SIZE <= 0)
+            throw new UserException("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead");
+
+        // initialize allN given the size of the context
+        for (int i = 0; i < CONTEXT_SIZE; i++)
             allN += "N";
-        }
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
         byte[] bases = read.getReadBases();
-        for(int i = 0; i < read.getReadLength(); i++) {
-            comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) );
-        }
+        for (int i = 0; i < read.getReadLength(); i++)
+            comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
+    public final Comparable getValue(final String str) {
         return str;
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
index 2e32dbb8c..e4edb8ca6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -39,19 +39,18 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
  */
 
 public interface Covariate {
-    public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
-    public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
-    public void getValues( SAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType );
-            //Takes an array of size (at least) read.getReadLength() and fills it with covariate
-            //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
-            //read-specific calculations to be done just once rather than for each offset.
+    public void initialize(RecalibrationArgumentCollection RAC); // Initialize any member variables using the command-line arguments passed to the walkers
+
+    public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
+
+    public void getValues(GATKSAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType);
+    //Takes an array of size (at least) read.getReadLength() and fills it with covariate
+    //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
+    //read-specific calculations to be done just once rather than for each offset.
 }
 
-interface RequiredCovariate extends Covariate {
-}
+interface RequiredCovariate extends Covariate {}
 
-interface StandardCovariate extends Covariate {
-}
+interface StandardCovariate extends Covariate {}
 
-interface ExperimentalCovariate extends Covariate {
-}
+interface ExperimentalCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
index 00490d898..4244af7d1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
@@ -1,6 +1,5 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -40,69 +39,69 @@ import java.util.EnumSet;
  * Date: Oct 30, 2009
  *
  * The Cycle covariate.
- *  For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read)
- *  For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle
- *     For example, for the read: AAACCCCGAAATTTTTACTG
- *             the cycle would be 11111111222333333344
- *  For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round
+ * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read)
+ * For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle
+ * For example, for the read: AAACCCCGAAATTTTTACTG
+ * the cycle would be 11111111222333333344
+ * For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round
  */
 
 public class CycleCovariate implements StandardCovariate {
     private final static EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
-    private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454,  NGSPlatform.ION_TORRENT);
+    private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
-        if( RAC.DEFAULT_PLATFORM != null ) {
-            if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
-                RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        if (RAC.DEFAULT_PLATFORM != null) {
+            if (RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SLX") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ILLUMINA") ||
+                    RAC.DEFAULT_PLATFORM.contains("454") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SOLID") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ABI_SOLID")) {
                 // nothing to do
-            } else {
-                throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
+            }
+            else {
+                throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform. Implemented options are illumina, 454, and solid");
             }
         }
     }
 
     // Used to pick out the covariate's value from attributes of the read
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
 
         //-----------------------------
         // Illumina, Solid, PacBio, and Complete Genomics
         //-----------------------------
 
-        final NGSPlatform ngsPlatform = ((GATKSAMRecord)read).getNGSPlatform();
-        if( DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
+        final NGSPlatform ngsPlatform = read.getNGSPlatform();
+        if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
             final int init;
             final int increment;
-            if( !read.getReadNegativeStrandFlag() ) {
+            if (!read.getReadNegativeStrandFlag()) {
                 // Differentiate between first and second of pair.
                 // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
                 // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
                 // Therefore the cycle covariate must differentiate between first and second of pair reads.
                 // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
                 //   the current sequential model would consider the effects independently instead of jointly.
-                if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
+                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
                     //second of pair, positive strand
                     init = -1;
                     increment = -1;
                 }
-                else
-                {
+                else {
                     //first of pair, positive strand
                     init = 1;
                     increment = 1;
                 }
 
-            } else {
-                if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
+            }
+            else {
+                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
                     //second of pair, negative strand
                     init = -read.getReadLength();
                     increment = 1;
                 }
-                else
-                {
+                else {
                     //first of pair, negative strand
                     init = read.getReadLength();
                     increment = -1;
@@ -110,7 +109,7 @@ public class CycleCovariate implements StandardCovariate {
             }
 
             int cycle = init;
-            for(int i = 0; i < read.getReadLength(); i++) {
+            for (int i = 0; i < read.getReadLength(); i++) {
                 comparable[i] = cycle;
                 cycle += increment;
             }
@@ -119,7 +118,7 @@ public class CycleCovariate implements StandardCovariate {
         //-----------------------------
         // 454 and Ion Torrent
         //-----------------------------
-        else if( FLOW_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
+        else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) {
 
             final int readLength = read.getReadLength();
             final byte[] bases = read.getReadBases();
@@ -136,39 +135,78 @@ public class CycleCovariate implements StandardCovariate {
 
             // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
             // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
-            if( !read.getReadNegativeStrandFlag() ) { // Forward direction
+            if (!read.getReadNegativeStrandFlag()) { // Forward direction
                 int iii = 0;
-                while( iii < readLength )
-                {
-                    while( iii < readLength && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii++; }
-                    while( iii < readLength && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii++; }
-                    while( iii < readLength && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii++; }
-                    while( iii < readLength && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii++; }
-                    if( iii < readLength ) { if (multiplyByNegative1) cycle--; else cycle++; }
-                    if( iii < readLength && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii++; }
+                while (iii < readLength) {
+                    while (iii < readLength && bases[iii] == (byte) 'T') {
+                        comparable[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'A') {
+                        comparable[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'C') {
+                        comparable[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'G') {
+                        comparable[iii] = cycle;
+                        iii++;
+                    }
+                    if (iii < readLength) {
+                        if (multiplyByNegative1)
+                            cycle--;
+                        else
+                            cycle++;
+                    }
+                    if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
+                        comparable[iii] = cycle;
+                        iii++;
+                    }
 
                 }
-            } else { // Negative direction
-                int iii = readLength-1;
-                while( iii >= 0 )
-                {
-                    while( iii >= 0 && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii--; }
-                    while( iii >= 0 && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii--; }
-                    while( iii >= 0 && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii--; }
-                    while( iii >= 0 && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii--; }
-                    if( iii >= 0 ) { if (multiplyByNegative1) cycle--; else cycle++; }
-                    if( iii >= 0 && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii--; }
+            }
+            else { // Negative direction
+                int iii = readLength - 1;
+                while (iii >= 0) {
+                    while (iii >= 0 && bases[iii] == (byte) 'T') {
+                        comparable[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'A') {
+                        comparable[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'C') {
+                        comparable[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'G') {
+                        comparable[iii] = cycle;
+                        iii--;
+                    }
+                    if (iii >= 0) {
+                        if (multiplyByNegative1)
+                            cycle--;
+                        else
+                            cycle++;
+                    }
+                    if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
+                        comparable[iii] = cycle;
+                        iii--;
+                    }
                 }
             }
         }
-        else  {
+        else {
             throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
index e60b1f795..2fa1b33ca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
@@ -1,8 +1,8 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.HashMap;
 
@@ -43,30 +43,30 @@ import java.util.HashMap;
 
 public class DinucCovariate implements StandardCovariate {
 
-    private static final byte NO_CALL = (byte)'N';
+    private static final byte NO_CALL = (byte) 'N';
     private static final Dinuc NO_DINUC = new Dinuc(NO_CALL, NO_CALL);
 
     private HashMap<Integer, Dinuc> dinucHashMap;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
-        final byte[] BASES = { (byte)'A', (byte)'C', (byte)'G', (byte)'T' };
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        final byte[] BASES = {(byte) 'A', (byte) 'C', (byte) 'G', (byte) 'T'};
         dinucHashMap = new HashMap<Integer, Dinuc>();
-        for( byte byte1 : BASES ) {
-            for( byte byte2: BASES ) {
-                dinucHashMap.put( Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2) ); // This might seem silly, but Strings are too slow
+        for (byte byte1 : BASES) {
+            for (byte byte2 : BASES) {
+                dinucHashMap.put(Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2)); // This might seem silly, but Strings are too slow
             }
         }
         // Add the "no dinuc" entry too
-        dinucHashMap.put( Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC );
+        dinucHashMap.put(Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC);
     }
 
     /**
      * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
      */
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
         final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
         final int readLength = read.getReadLength();
         final boolean negativeStrand = read.getReadNegativeStrandFlag();
@@ -76,37 +76,38 @@ public class DinucCovariate implements StandardCovariate {
         int offset = 0;
         // If this is a negative strand read then we need to reverse the direction for our previous base
 
-        if(negativeStrand) {
+        if (negativeStrand) {
             bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
         }
         comparable[0] = NO_DINUC; // No dinuc at the beginning of the read
 
         prevBase = bases[0];
         offset++;
-        while(offset < readLength) {
-             // Note: We are using the previous base in the read, not the
-             // previous base in the reference. This is done in part to be consistent with unmapped reads.
-             base = bases[offset];
-             if( BaseUtils.isRegularBase( prevBase ) ) {
-                 comparable[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
-             } else {
-                 comparable[offset] = NO_DINUC;
-             }
+        while (offset < readLength) {
+            // Note: We are using the previous base in the read, not the
+            // previous base in the reference. This is done in part to be consistent with unmapped reads.
+            base = bases[offset];
+            if (BaseUtils.isRegularBase(prevBase)) {
+                comparable[offset] = dinucHashMapRef.get(Dinuc.hashBytes(prevBase, base));
+            }
+            else {
+                comparable[offset] = NO_DINUC;
+            }
 
-             offset++;
-             prevBase = base;
+            offset++;
+            prevBase = base;
         }
-        if(negativeStrand) {
-            reverse( comparable );
+        if (negativeStrand) {
+            reverse(comparable);
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
+    public final Comparable getValue(final String str) {
         byte[] bytes = str.getBytes();
-        final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) );
-        if( returnDinuc.compareTo(NO_DINUC) == 0 ) {
+        final Dinuc returnDinuc = dinucHashMap.get(Dinuc.hashBytes(bytes[0], bytes[1]));
+        if (returnDinuc.compareTo(NO_DINUC) == 0) {
             return null;
         }
         return returnDinuc;
@@ -115,11 +116,11 @@ public class DinucCovariate implements StandardCovariate {
     /**
      * Reverses the given array in place.
      *
-     * @param array
+     * @param array any array
      */
     private static void reverse(final Comparable[] array) {
         final int arrayLength = array.length;
-        for(int l = 0, r = arrayLength - 1; l < r; l++, r--) {
+        for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
             final Comparable temp = array[l];
             array[l] = array[r];
             array[r] = temp;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
index e4ff415fe..7b209ae5c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2010 The Broad Institute
@@ -39,55 +40,57 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 public class GCContentCovariate implements ExperimentalCovariate {
 
-    int numBack = 7;
+    private int numBack = 7;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
         numBack = RAC.HOMOPOLYMER_NBACK;
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
+    private Comparable getValue(final SAMRecord read, final int offset) {
 
         // ATTGCCCCGTAAAAAAAGAGAA
         // 0000123456654321001122
 
-        if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) ) {
+        if (read.getReadGroup().getPlatform().equalsIgnoreCase("ILLUMINA") || read.getReadGroup().getPlatform().equalsIgnoreCase("SLX")) {
             int numGC = 0;
-            int startPos = 0;
-            int stopPos = 0;
+            int startPos;
+            int stopPos;
             final byte[] bases = read.getReadBases();
-            if( !read.getReadNegativeStrandFlag() ) { // Forward direction
+            if (!read.getReadNegativeStrandFlag()) { // Forward direction
                 startPos = Math.max(offset - numBack, 0);
                 stopPos = Math.max(offset - 1, 0);
-            } else { // Negative direction
+            }
+            else { // Negative direction
                 startPos = Math.min(offset + 2, bases.length);
                 stopPos = Math.min(offset + numBack + 1, bases.length);
             }
 
-            for( int iii = startPos; iii < stopPos; iii++ ) {
-                if( bases[iii] == (byte)'G' || bases[iii] == (byte)'C' ) {
+            for (int iii = startPos; iii < stopPos; iii++) {
+                if (bases[iii] == (byte) 'G' || bases[iii] == (byte) 'C') {
                     numGC++;
                 }
             }
 
             return numGC;
-        } else { // This effect is specific to the Illumina platform
+        }
+        else { // This effect is specific to the Illumina platform
             return -1;
         }
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
index 24cb98a8d..fd67edc3b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -41,16 +42,16 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 public class HomopolymerCovariate implements ExperimentalCovariate {
 
-    int numBack = 7;
+    private int numBack;
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
         numBack = RAC.HOMOPOLYMER_NBACK;
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
+    private Comparable getValue(final SAMRecord read, final int offset) {
 
         // This block of code is for if you don't want to only count consecutive bases
         // ATTGCCCCGTAAAAAAAAATA
@@ -77,13 +78,14 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
         int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read
         final byte[] bases = read.getReadBases();
         int iii = offset;
-        if( !read.getReadNegativeStrandFlag() ) { // Forward direction
-            while( iii <= bases.length-2 && bases[iii] == bases[iii+1] && numAgree < numBack ) {
+        if (!read.getReadNegativeStrandFlag()) { // Forward direction
+            while (iii <= bases.length - 2 && bases[iii] == bases[iii + 1] && numAgree < numBack) {
                 numAgree++;
                 iii++;
             }
-        } else { // Negative direction
-            while( iii >= 1 && bases[iii] == bases[iii-1] && numAgree < numBack ) {
+        }
+        else { // Negative direction
+            while (iii >= 1 && bases[iii] == bases[iii - 1] && numAgree < numBack) {
                 numAgree++;
                 iii--;
             }
@@ -93,15 +95,15 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
index ec5b357a4..e22049890 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -40,24 +40,24 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
+    private Comparable getValue(final GATKSAMRecord read) {
         return read.getMappingQuality();
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
-            comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
+            comparable[iii] = getValue(read); // BUGBUG: this can be optimized
         }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
index 21fd14e0c..1dfb915b9 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -43,20 +44,20 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
         windowReach = RAC.WINDOW_SIZE / 2; // integer division
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
+    private Comparable getValue(final SAMRecord read, final int offset) {
 
         // Loop over the list of base quality scores in the window and find the minimum
         final byte[] quals = read.getBaseQualities();
         int minQual = quals[offset];
         final int minIndex = Math.max(offset - windowReach, 0);
         final int maxIndex = Math.min(offset + windowReach, quals.length - 1);
-        for ( int iii = minIndex; iii < maxIndex; iii++ ) {
-            if( quals[iii] < minQual ) {
+        for (int iii = minIndex; iii < maxIndex; iii++) {
+            if (quals[iii] < minQual) {
                 minQual = quals[iii];
             }
         }
@@ -64,15 +65,15 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
index 5c410ce5f..fbd1efc47 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -41,28 +42,28 @@ public class PositionCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
+    private Comparable getValue(final SAMRecord read, final int offset) {
         int cycle = offset;
-        if( read.getReadNegativeStrandFlag() ) {
+        if (read.getReadNegativeStrandFlag()) {
             cycle = read.getReadLength() - (offset + 1);
         }
         return cycle;
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
index e6aa44226..8dfa11884 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
 
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -34,41 +35,42 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
  * Date: Nov 13, 2009
  *
  * The Primer Round covariate.
- *  For Solexa and 454 this is the same value of the length of the read.
- *  For SOLiD this is different for each position according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
+ * For Solexa and 454 this is the same value of the length of the read.
+ * For SOLiD this is different for each position according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
  */
 
 public class PrimerRoundCovariate implements ExperimentalCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
     }
 
     // Used to pick out the covariate's value from attributes of the read
-    private final Comparable getValue( final SAMRecord read, final int offset ) {
-        if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" ) ) {
+    private Comparable getValue(final SAMRecord read, final int offset) {
+        if (read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || read.getReadGroup().getPlatform().equalsIgnoreCase("ABI_SOLID")) {
             int pos = offset;
-            if( read.getReadNegativeStrandFlag() ) {
+            if (read.getReadNegativeStrandFlag()) {
                 pos = read.getReadLength() - (offset + 1);
             }
             return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
-        } else {
+        }
+        else {
             return 1; // nothing to do here because it is always the same
         }
 
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        for(int iii = 0; iii < read.getReadLength(); iii++) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
index f85b52350..1ed4a6fe8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.Arrays;
 
@@ -42,25 +42,26 @@ public class QualityScoreCovariate implements RequiredCovariate {
 
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
-        if( modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+        if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
             byte[] baseQualities = read.getBaseQualities();
-            for(int i = 0; i < read.getReadLength(); i++) {
+            for (int i = 0; i < read.getReadLength(); i++) {
                 comparable[i] = (int) baseQualities[i];
             }
-        } else { // model == BASE_INSERTION || model == BASE_DELETION
+        }
+        else { // model == BASE_INSERTION || model == BASE_DELETION
             Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
-                                         // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
+            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
-        return Integer.parseInt( str );
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
index e27077128..27e1d8263 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 /*
  * Copyright (c) 2009 The Broad Institute
@@ -38,24 +38,22 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 public class ReadGroupCovariate implements RequiredCovariate {
 
-    public static final String defaultReadGroup = "DefaultReadGroup";
-
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
-    public void initialize( final RecalibrationArgumentCollection RAC ) {
+    public void initialize(final RecalibrationArgumentCollection RAC) {
     }
 
     @Override
-    public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
         final String readGroupId = read.getReadGroup().getReadGroupId();
-        for(int i = 0; i < read.getReadLength(); i++) {
+        for (int i = 0; i < read.getReadLength(); i++) {
             comparable[i] = readGroupId;
         }
     }
 
     // Used to get the covariate's value from input csv file in TableRecalibrationWalker
     @Override
-    public final Comparable getValue( final String str ) {
+    public final Comparable getValue(final String str) {
         return str;
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index be02063de..18b33c0e8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -25,7 +25,6 @@
 
 package org.broadinstitute.sting.gatk.walkers.recalibration;
 
-import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.BaseUtils;
@@ -67,22 +66,36 @@ public class RecalDataManager {
     private static boolean warnUserNullPlatform = false;
 
     public enum SOLID_RECAL_MODE {
-        /** Treat reference inserted bases as reference matching bases. Very unsafe! */
+        /**
+         * Treat reference inserted bases as reference matching bases. Very unsafe!
+         */
         DO_NOTHING,
-        /** Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option. */
+        /**
+         * Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option.
+         */
         SET_Q_ZERO,
-        /** In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV. */
+        /**
+         * In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV.
+         */
         SET_Q_ZERO_BASE_N,
-        /** Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference. */
+        /**
+         * Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference.
+         */
         REMOVE_REF_BIAS
     }
 
     public enum SOLID_NOCALL_STRATEGY {
-        /** When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option. */
+        /**
+         * When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option.
+         */
         THROW_EXCEPTION,
-        /** Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare. */
+        /**
+         * Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare.
+         */
         LEAVE_READ_UNRECALIBRATED,
-        /** Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses. */
+        /**
+         * Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses.
+         */
         PURGE_READ
     }
 
@@ -93,16 +106,17 @@ public class RecalDataManager {
         dataCollapsedByCovariate = null;
     }
 
-    public RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
-        if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
+    public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
+        if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
             data = null;
             dataCollapsedReadGroup = new NestedHashMap();
             dataCollapsedQualityScore = new NestedHashMap();
             dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
-            for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
-                dataCollapsedByCovariate.add( new NestedHashMap() );
+            for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
+                dataCollapsedByCovariate.add(new NestedHashMap());
             }
-        } else {
+        }
+        else {
             data = new NestedHashMap();
             dataCollapsedReadGroup = null;
             dataCollapsedQualityScore = null;
@@ -112,54 +126,58 @@ public class RecalDataManager {
 
     /**
      * Add the given mapping to all of the collapsed hash tables
-     * @param key The list of comparables that is the key for this mapping
-     * @param fullDatum The RecalDatum which is the data for this mapping
+     *
+     * @param key                        The list of comparables that is the key for this mapping
+     * @param fullDatum                  The RecalDatum which is the data for this mapping
      * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
      */
-    public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN ) {
+    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) {
 
         // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
         //data.put(key, thisDatum); // add the mapping to the main table
 
-        final int qualityScore = Integer.parseInt( key[1].toString() );
+        final int qualityScore = Integer.parseInt(key[1].toString());
         final Object[] readGroupCollapsedKey = new Object[1];
         final Object[] qualityScoreCollapsedKey = new Object[2];
         final Object[] covariateCollapsedKey = new Object[3];
         RecalDatum collapsedDatum;
 
         // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
-        if( qualityScore >= PRESERVE_QSCORES_LESS_THAN ) {
+        if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
             readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
-            collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get( readGroupCollapsedKey );
-            if( collapsedDatum == null ) {
-                dataCollapsedReadGroup.put( new RecalDatum(fullDatum), readGroupCollapsedKey );
-            } else {
-                collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
+            collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
+            if (collapsedDatum == null) {
+                dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
+            }
+            else {
+                collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
             }
         }
 
         // Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
         qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
         qualityScoreCollapsedKey[1] = key[1]; //                                    and quality score
-        collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get( qualityScoreCollapsedKey );
-        if( collapsedDatum == null ) {
-            dataCollapsedQualityScore.put( new RecalDatum(fullDatum), qualityScoreCollapsedKey );
-        } else {
-            collapsedDatum.increment( fullDatum );
+        collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
+        if (collapsedDatum == null) {
+            dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
+        }
+        else {
+            collapsedDatum.increment(fullDatum);
         }
 
         // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
-        for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
+        for (int iii = 0; iii < dataCollapsedByCovariate.size(); iii++) {
             covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
             covariateCollapsedKey[1] = key[1]; //                                    and quality score ...
             final Object theCovariateElement = key[iii + 2]; //                                        and the given covariate
-            if( theCovariateElement != null ) {
+            if (theCovariateElement != null) {
                 covariateCollapsedKey[2] = theCovariateElement;
-                collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey );
-                if( collapsedDatum == null ) {
-                    dataCollapsedByCovariate.get(iii).put( new RecalDatum(fullDatum), covariateCollapsedKey );
-                } else {
-                    collapsedDatum.increment( fullDatum );
+                collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
+                if (collapsedDatum == null) {
+                    dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
+                }
+                else {
+                    collapsedDatum.increment(fullDatum);
                 }
             }
         }
@@ -167,150 +185,162 @@ public class RecalDataManager {
 
     /**
      * Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
-     *   that will be used in the sequential calculation in TableRecalibrationWalker
+     * that will be used in the sequential calculation in TableRecalibrationWalker
+     *
      * @param smoothing The smoothing parameter that goes into empirical quality score calculation
-     * @param maxQual At which value to cap the quality scores
+     * @param maxQual   At which value to cap the quality scores
      */
-    public final void generateEmpiricalQualities( final int smoothing, final int maxQual ) {
+    public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
 
         recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual);
         recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
-        for( NestedHashMap map : dataCollapsedByCovariate ) {
+        for (NestedHashMap map : dataCollapsedByCovariate) {
             recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
             checkForSingletons(map.data);
         }
     }
 
-    private void recursivelyGenerateEmpiricalQualities( final Map data, final int smoothing, final int maxQual ) {
+    private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) {
 
-        for( Object comp : data.keySet() ) {
+        for (Object comp : data.keySet()) {
             final Object val = data.get(comp);
-            if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
-                ((RecalDatum)val).calcCombinedEmpiricalQuality(smoothing, maxQual);
-            } else { // Another layer in the nested hash map
-                recursivelyGenerateEmpiricalQualities( (Map) val, smoothing, maxQual);
+            if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
+                ((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual);
+            }
+            else { // Another layer in the nested hash map
+                recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual);
             }
         }
     }
 
-    private void checkForSingletons( final Map data ) {
+    private void checkForSingletons(final Map data) {
         // todo -- this looks like it's better just as a data.valueSet() call?
-        for( Object comp : data.keySet() ) {
+        for (Object comp : data.keySet()) {
             final Object val = data.get(comp);
-            if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
-                if( data.keySet().size() == 1) {
+            if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
+                if (data.keySet().size() == 1) {
                     data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
-                                                                                                    // in a previous step of the sequential calculation model
+                    // in a previous step of the sequential calculation model
                 }
-            } else { // Another layer in the nested hash map
-                checkForSingletons( (Map) val );
+            }
+            else { // Another layer in the nested hash map
+                checkForSingletons((Map) val);
             }
         }
     }
 
     /**
      * Get the appropriate collapsed table out of the set of all the tables held by this Object
+     *
      * @param covariate Which covariate indexes the desired collapsed HashMap
      * @return The desired collapsed HashMap
      */
-    public final NestedHashMap getCollapsedTable( final int covariate ) {
-        if( covariate == 0) {
+    public final NestedHashMap getCollapsedTable(final int covariate) {
+        if (covariate == 0) {
             return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
-        } else if( covariate == 1 ) {
+        }
+        else if (covariate == 1) {
             return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
-        } else {
-            return dataCollapsedByCovariate.get( covariate - 2 ); // Table where everything except read group, quality score, and given covariate has been collapsed
+        }
+        else {
+            return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
         }
     }
 
     /**
      * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
+     *
      * @param read The read to adjust
-     * @param RAC The list of shared command line arguments
+     * @param RAC  The list of shared command line arguments
      */
-    public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
-        GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord)read).getReadGroup();
+    public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
+        GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
 
         // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
-        if( readGroup == null ) {
-            if( RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
-                if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) {
+        if (readGroup == null) {
+            if (RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
+                if (!warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null) {
                     Utils.warnUser("The input .bam file contains reads with no read group. " +
-                                    "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
-                                    "First observed at read with name = " + read.getReadName() );
+                            "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
+                            "First observed at read with name = " + read.getReadName());
                     warnUserNullReadGroup = true;
                 }
                 // There is no readGroup so defaulting to these values
-                readGroup = new GATKSAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
-                readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
-                ((GATKSAMRecord)read).setReadGroup( readGroup );
-            } else {
-                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() );
+                readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
+                readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
+                ((GATKSAMRecord) read).setReadGroup(readGroup);
+            }
+            else {
+                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
             }
         }
 
-        if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
+        if (RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP)) { // Collapse all the read groups into a single common String provided by the user
             final String oldPlatform = readGroup.getPlatform();
-            readGroup = new GATKSAMReadGroupRecord( RAC.FORCE_READ_GROUP );
-            readGroup.setPlatform( oldPlatform );
-            ((GATKSAMRecord)read).setReadGroup( readGroup );
+            readGroup = new GATKSAMReadGroupRecord(RAC.FORCE_READ_GROUP);
+            readGroup.setPlatform(oldPlatform);
+            ((GATKSAMRecord) read).setReadGroup(readGroup);
         }
 
-        if( RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
-            readGroup.setPlatform( RAC.FORCE_PLATFORM );
+        if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
+            readGroup.setPlatform(RAC.FORCE_PLATFORM);
         }
 
-        if ( readGroup.getPlatform() == null ) {
-            if( RAC.DEFAULT_PLATFORM != null ) {
-                if( !warnUserNullPlatform ) {
+        if (readGroup.getPlatform() == null) {
+            if (RAC.DEFAULT_PLATFORM != null) {
+                if (!warnUserNullPlatform) {
                     Utils.warnUser("The input .bam file contains reads with no platform information. " +
-                                        "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
-                                        "First observed at read with name = " + read.getReadName() );
+                            "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
+                            "First observed at read with name = " + read.getReadName());
                     warnUserNullPlatform = true;
                 }
-                readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
-            } else {
-                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() );
+                readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
+            }
+            else {
+                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName());
             }
         }
     }
 
     /**
      * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
+     *
      * @param read The SAMRecord to parse
      */
-    public static void parseColorSpace( final SAMRecord read ) {
+    public static void parseColorSpace(final GATKSAMRecord read) {
 
         // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
-        if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
-            if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
+        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+            if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
                 final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
-                if( attr != null ) {
+                if (attr != null) {
                     byte[] colorSpace;
-                    if( attr instanceof String ) {
-                        colorSpace = ((String)attr).getBytes();
-                    } else {
+                    if (attr instanceof String) {
+                        colorSpace = ((String) attr).getBytes();
+                    }
+                    else {
                         throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
                     }
 
                     // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
                     byte[] readBases = read.getReadBases();
-                    if( read.getReadNegativeStrandFlag() ) {
-                        readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
+                    if (read.getReadNegativeStrandFlag()) {
+                        readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
                     }
                     final byte[] inconsistency = new byte[readBases.length];
                     int iii;
                     byte prevBase = colorSpace[0]; // The sentinel
-                    for( iii = 0; iii < readBases.length; iii++ ) {
-                        final byte thisBase = getNextBaseFromColor( read, prevBase, colorSpace[iii + 1] );
-                        inconsistency[iii] = (byte)( thisBase == readBases[iii] ? 0 : 1 );
+                    for (iii = 0; iii < readBases.length; iii++) {
+                        final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
+                        inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1);
                         prevBase = readBases[iii];
                     }
-                    read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency );
+                    read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
 
-                } else {
+                }
+                else {
                     throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
-                                            " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+                            " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
                 }
             }
         }
@@ -319,52 +349,57 @@ public class RecalDataManager {
     /**
      * Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
      * This method doesn't add the inconsistent tag to the read like parseColorSpace does
-     * @param read The SAMRecord to parse
+     *
+     * @param read               The SAMRecord to parse
      * @param originalQualScores The array of original quality scores to modify during the correction
-     * @param solidRecalMode Which mode of solid recalibration to apply
-     * @param refBases The reference for this read
+     * @param solidRecalMode     Which mode of solid recalibration to apply
+     * @param refBases           The reference for this read
      * @return A new array of quality scores that have been ref bias corrected
      */
-    public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases ) {
+    public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
 
         final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
-        if( attr != null ) {
+        if (attr != null) {
             byte[] colorSpace;
-            if( attr instanceof String ) {
-                colorSpace = ((String)attr).getBytes();
-            } else {
+            if (attr instanceof String) {
+                colorSpace = ((String) attr).getBytes();
+            }
+            else {
                 throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
             }
 
             // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
             byte[] readBases = read.getReadBases();
             final byte[] colorImpliedBases = readBases.clone();
-            byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray( read.getCigar(), read.getReadBases(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
-            if( read.getReadNegativeStrandFlag() ) {
-                readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
-                refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() );
+            byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
+            if (read.getReadNegativeStrandFlag()) {
+                readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
+                refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
             }
             final int[] inconsistency = new int[readBases.length];
             byte prevBase = colorSpace[0]; // The sentinel
-            for( int iii = 0; iii < readBases.length; iii++ ) {
-                final byte thisBase = getNextBaseFromColor( read, prevBase, colorSpace[iii + 1] );
+            for (int iii = 0; iii < readBases.length; iii++) {
+                final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
                 colorImpliedBases[iii] = thisBase;
-                inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 );
+                inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
                 prevBase = readBases[iii];
             }
 
             // Now that we have the inconsistency array apply the desired correction to the inconsistent bases
-            if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO ) { // Set inconsistent bases and the one before it to Q0
+            if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
                 final boolean setBaseN = false;
                 originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
-            } else if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N ) {
+            }
+            else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
                 final boolean setBaseN = true;
                 originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
-            } else if( solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
+            }
+            else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
                 solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
             }
 
-        } else {
+        }
+        else {
             throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
                     " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
         }
@@ -372,26 +407,28 @@ public class RecalDataManager {
         return originalQualScores;
     }
 
-    public static boolean checkNoCallColorSpace( final SAMRecord read ) {
-        if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
+    public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
+        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
             final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
-            if( attr != null ) {
+            if (attr != null) {
                 byte[] colorSpace;
-                if( attr instanceof String ) {
-                    colorSpace = ((String)attr).substring(1).getBytes(); // trim off the Sentinel
-                } else {
+                if (attr instanceof String) {
+                    colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
+                }
+                else {
                     throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
                 }
 
-                for( byte color : colorSpace ) {
-                    if( color != (byte)'0' && color != (byte)'1' && color != (byte)'2' && color != (byte)'3' ) {
+                for (byte color : colorSpace) {
+                    if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
                         return true; // There is a bad color in this SOLiD read and the user wants to skip over it
                     }
                 }
 
-            } else {
+            }
+            else {
                 throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
-                                        " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+                        " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
             }
         }
 
@@ -400,90 +437,105 @@ public class RecalDataManager {
 
     /**
      * Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
-     * @param read The SAMRecord to recalibrate
-     * @param readBases The bases in the read which have been RC'd if necessary
-     * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
+     *
+     * @param read               The SAMRecord to recalibrate
+     * @param readBases          The bases in the read which have been RC'd if necessary
+     * @param inconsistency      The array of 1/0 that says if this base is inconsistent with its color
      * @param originalQualScores The array of original quality scores to set to zero if needed
-     * @param refBases The reference which has been RC'd if necessary
-     * @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
+     * @param refBases           The reference which has been RC'd if necessary
+     * @param setBaseN           Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
      * @return The byte array of original quality scores some of which might have been set to zero
      */
-    private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
-                                                final byte[] refBases, final boolean setBaseN ) {
+    private static byte[] solidRecalSetToQZero(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final byte[] refBases, final boolean setBaseN) {
 
         final boolean negStrand = read.getReadNegativeStrandFlag();
-        for( int iii = 1; iii < originalQualScores.length; iii++ ) {
-            if( inconsistency[iii] == 1 ) {
-                if( readBases[iii] == refBases[iii] ) {
-                    if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
-                    else { originalQualScores[iii] = (byte)0; }
-                    if( setBaseN ) { readBases[iii] = (byte)'N'; }
+        for (int iii = 1; iii < originalQualScores.length; iii++) {
+            if (inconsistency[iii] == 1) {
+                if (readBases[iii] == refBases[iii]) {
+                    if (negStrand) {
+                        originalQualScores[originalQualScores.length - (iii + 1)] = (byte) 0;
+                    }
+                    else {
+                        originalQualScores[iii] = (byte) 0;
+                    }
+                    if (setBaseN) {
+                        readBases[iii] = (byte) 'N';
+                    }
                 }
                 // Set the prev base to Q0 as well
-                if( readBases[iii-1] == refBases[iii-1] ) {
-                    if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
-                    else { originalQualScores[iii-1] = (byte)0; }
-                    if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
+                if (readBases[iii - 1] == refBases[iii - 1]) {
+                    if (negStrand) {
+                        originalQualScores[originalQualScores.length - iii] = (byte) 0;
+                    }
+                    else {
+                        originalQualScores[iii - 1] = (byte) 0;
+                    }
+                    if (setBaseN) {
+                        readBases[iii - 1] = (byte) 'N';
+                    }
                 }
             }
         }
-        if( negStrand ) {
-            readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read
+        if (negStrand) {
+            readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
         }
-        read.setReadBases( readBases );
+        read.setReadBases(readBases);
 
         return originalQualScores;
     }
 
     /**
      * Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
-     * @param read The SAMRecord to recalibrate
-     * @param readBases The bases in the read which have been RC'd if necessary
-     * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
+     *
+     * @param read              The SAMRecord to recalibrate
+     * @param readBases         The bases in the read which have been RC'd if necessary
+     * @param inconsistency     The array of 1/0 that says if this base is inconsistent with its color
      * @param colorImpliedBases The bases implied by the color space, RC'd if necessary
-     * @param refBases The reference which has been RC'd if necessary
+     * @param refBases          The reference which has been RC'd if necessary
      */
-    private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
-                                                 final byte[] refBases) {
+    private static void solidRecalRemoveRefBias(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, final byte[] refBases) {
 
         final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
-        if( attr != null ) {
+        if (attr != null) {
             byte[] colorSpaceQuals;
-            if( attr instanceof String ) {
-                String x = (String)attr;
+            if (attr instanceof String) {
+                String x = (String) attr;
                 colorSpaceQuals = x.getBytes();
                 SAMUtils.fastqToPhred(colorSpaceQuals);
-            } else {
+            }
+            else {
                 throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
             }
 
-            for( int iii = 1; iii < inconsistency.length - 1; iii++ ) {
-                if( inconsistency[iii] == 1 ) {
-                    for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
-                        if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
-                            if( readBases[jjj] == refBases[jjj] ) {
-                                if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
-                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt( 2 );
-                                    if( rand == 0 ) { // The color implied base won the coin flip
+            for (int iii = 1; iii < inconsistency.length - 1; iii++) {
+                if (inconsistency[iii] == 1) {
+                    for (int jjj = iii - 1; jjj <= iii; jjj++) { // Correct this base and the one before it along the direction of the read
+                        if (jjj == iii || inconsistency[jjj] == 0) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
+                            if (readBases[jjj] == refBases[jjj]) {
+                                if (colorSpaceQuals[jjj] == colorSpaceQuals[jjj + 1]) { // Equal evidence for the color implied base and the reference base, so flip a coin
+                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(2);
+                                    if (rand == 0) { // The color implied base won the coin flip
                                         readBases[jjj] = colorImpliedBases[jjj];
                                     }
-                                } else {
-                                    final int maxQuality = Math.max((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]);
-                                    final int minQuality = Math.min((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]);
+                                }
+                                else {
+                                    final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
+                                    final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
                                     int diffInQuality = maxQuality - minQuality;
                                     int numLow = minQuality;
-                                    if( numLow == 0 ) {
+                                    if (numLow == 0) {
                                         numLow++;
                                         diffInQuality++;
                                     }
-                                    final int numHigh = Math.round( numLow * (float)Math.pow(10.0f, (float) diffInQuality / 10.0f) ); // The color with higher quality is exponentially more likely
-                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt( numLow + numHigh );
-                                    if( rand >= numLow ) { // higher q score won
-                                        if( maxQuality == (int)colorSpaceQuals[jjj] ) {
+                                    final int numHigh = Math.round(numLow * (float) Math.pow(10.0f, (float) diffInQuality / 10.0f)); // The color with higher quality is exponentially more likely
+                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(numLow + numHigh);
+                                    if (rand >= numLow) { // higher q score won
+                                        if (maxQuality == (int) colorSpaceQuals[jjj]) {
                                             readBases[jjj] = colorImpliedBases[jjj];
                                         } // else ref color had higher q score, and won out, so nothing to do here
-                                    } else { // lower q score won
-                                        if( minQuality == (int)colorSpaceQuals[jjj] ) {
+                                    }
+                                    else { // lower q score won
+                                        if (minQuality == (int) colorSpaceQuals[jjj]) {
                                             readBases[jjj] = colorImpliedBases[jjj];
                                         } // else ref color had lower q score, and won out, so nothing to do here
                                     }
@@ -494,52 +546,56 @@ public class RecalDataManager {
                 }
             }
 
-            if( read.getReadNegativeStrandFlag() ) {
-                readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read
+            if (read.getReadNegativeStrandFlag()) {
+                readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
             }
-            read.setReadBases( readBases );
-        } else { // No color space quality tag in file
+            read.setReadBases(readBases);
+        }
+        else { // No color space quality tag in file
             throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
         }
     }
 
     /**
      * Given the base and the color calculate the next base in the sequence
+     *
      * @param prevBase The base
-     * @param color The color
+     * @param color    The color
      * @return The next base in the sequence
      */
-    private static byte getNextBaseFromColor( SAMRecord read, final byte prevBase, final byte color ) {
-        switch(color) {
+    private static byte getNextBaseFromColor(GATKSAMRecord read, final byte prevBase, final byte color) {
+        switch (color) {
             case '0':
                 return prevBase;
             case '1':
-                return performColorOne( prevBase );
+                return performColorOne(prevBase);
             case '2':
-                return performColorTwo( prevBase );
+                return performColorTwo(prevBase);
             case '3':
-                return performColorThree( prevBase );
+                return performColorThree(prevBase);
             default:
-                throw new UserException.MalformedBAM(read, "Unrecognized color space in SOLID read, color = " + (char)color +
-                                          " Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
+                throw new UserException.MalformedBAM(read, "Unrecognized color space in SOLID read, color = " + (char) color +
+                        " Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
         }
     }
 
     /**
      * Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality
-     * @param read The read which contains the color space to check against
+     *
+     * @param read   The read which contains the color space to check against
      * @param offset The offset in the read at which to check
      * @return Returns true if the base was inconsistent with the color space
      */
-    public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) {
+    public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
         final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
-        if( attr != null ) {
-            final byte[] inconsistency = (byte[])attr;
+        if (attr != null) {
+            final byte[] inconsistency = (byte[]) attr;
             // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
-            if( read.getReadNegativeStrandFlag() ) { // Negative direction
-                return inconsistency[inconsistency.length - offset - 1] != (byte)0;
-            } else { // Forward direction
-                return inconsistency[offset] != (byte)0;
+            if (read.getReadNegativeStrandFlag()) { // Negative direction
+                return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
+            }
+            else { // Forward direction
+                return inconsistency[offset] != (byte) 0;
             }
 
             // This block of code is for if you want to check both the offset and the next base for color space inconsistency
@@ -557,7 +613,8 @@ public class RecalDataManager {
             //    }
             //}
 
-        } else { // No inconsistency array, so nothing is inconsistent
+        }
+        else { // No inconsistency array, so nothing is inconsistent
             return false;
         }
     }
@@ -566,33 +623,32 @@ public class RecalDataManager {
      * Computes all requested covariates for every offset in the given read
      * by calling covariate.getValues(..).
      *
-     * @param gatkRead The read for which to compute covariate values.
+     * @param gatkRead            The read for which to compute covariate values.
      * @param requestedCovariates The list of requested covariates.
      * @return An array of covariate values where result[i][j] is the covariate
-     * value for the ith position in the read and the jth covariate in
-     * reqeustedCovariates list.
+     *         value for the ith position in the read and the jth covariate in
+     *         reqeustedCovariates list.
      */
-     public static Comparable[][] computeCovariates( final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType ) {
-         //compute all covariates for this read
-         final List<Covariate> requestedCovariatesRef = requestedCovariates;
-         final int numRequestedCovariates = requestedCovariatesRef.size();
-         final int readLength = gatkRead.getReadLength();
+    public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
+        //compute all covariates for this read
+        final int numRequestedCovariates = requestedCovariates.size();
+        final int readLength = gatkRead.getReadLength();
 
-         final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
-         final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
+        final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
+        final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
 
-         // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
-         for( int i = 0; i < numRequestedCovariates; i++ ) {
-             requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder, modelType );
-             for(int j = 0; j < readLength; j++) {
-                 //copy values into a 2D array that allows all covar types to be extracted at once for
-                 //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
-                 covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
-             }
-         }
+        // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
+        for (int i = 0; i < numRequestedCovariates; i++) {
+            requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
+            for (int j = 0; j < readLength; j++) {
+                //copy values into a 2D array that allows all covar types to be extracted at once for
+                //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
+                covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
+            }
+        }
 
-         return covariateValues_offset_x_covar;
-     }
+        return covariateValues_offset_x_covar;
+    }
 
     /**
      * Perform a ceratin transversion (A <-> C or G <-> T) on the base.
@@ -603,14 +659,19 @@ public class RecalDataManager {
     private static byte performColorOne(byte base) {
         switch (base) {
             case 'A':
-            case 'a': return 'C';
+            case 'a':
+                return 'C';
             case 'C':
-            case 'c': return 'A';
+            case 'c':
+                return 'A';
             case 'G':
-            case 'g': return 'T';
+            case 'g':
+                return 'T';
             case 'T':
-            case 't': return 'G';
-            default: return base;
+            case 't':
+                return 'G';
+            default:
+                return base;
         }
     }
 
@@ -623,14 +684,19 @@ public class RecalDataManager {
     private static byte performColorTwo(byte base) {
         switch (base) {
             case 'A':
-            case 'a': return 'G';
+            case 'a':
+                return 'G';
             case 'C':
-            case 'c': return 'T';
+            case 'c':
+                return 'T';
             case 'G':
-            case 'g': return 'A';
+            case 'g':
+                return 'A';
             case 'T':
-            case 't': return 'C';
-            default: return base;
+            case 't':
+                return 'C';
+            default:
+                return base;
         }
     }
 
@@ -643,14 +709,19 @@ public class RecalDataManager {
     private static byte performColorThree(byte base) {
         switch (base) {
             case 'A':
-            case 'a': return 'T';
+            case 'a':
+                return 'T';
             case 'C':
-            case 'c': return 'G';
+            case 'c':
+                return 'G';
             case 'G':
-            case 'g': return 'C';
+            case 'g':
+                return 'C';
             case 'T':
-            case 't': return 'A';
-            default: return base;
+            case 't':
+                return 'A';
+            default:
+                return base;
         }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
index 75de84cb4..ffdb0cca7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
@@ -43,36 +43,36 @@ public class RecalibrationArgumentCollection {
     // Shared Command Line Arguments
     //////////////////////////////////
     @Hidden
-    @Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
+    @Argument(fullName = "default_read_group", shortName = "dRG", required = false, doc = "If a read has no read group then default to the provided String.")
     public String DEFAULT_READ_GROUP = null;
     @Hidden
-    @Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
+    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
     public String DEFAULT_PLATFORM = null;
     @Hidden
-    @Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
+    @Argument(fullName = "force_read_group", shortName = "fRG", required = false, doc = "If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
     public String FORCE_READ_GROUP = null;
     @Hidden
-    @Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
+    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
     public String FORCE_PLATFORM = null;
     @Hidden
-    @Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
+    @Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
     public int WINDOW_SIZE = 5;
 
     /**
      * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
      */
     @Hidden
-    @Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
+    @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
     public int HOMOPOLYMER_NBACK = 7;
     @Hidden
-    @Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
+    @Argument(fullName = "exception_if_no_tile", shortName = "throwTileException", doc = "If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required = false)
     public boolean EXCEPTION_IF_NO_TILE = false;
 
     /**
      * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
      * reads which have had the reference inserted because of color space inconsistencies.
      */
-    @Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
+    @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
     public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
 
     /**
@@ -80,6 +80,13 @@ public class RecalibrationArgumentCollection {
      * no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in
      * their color space tag can not be recalibrated.
      */
-    @Argument(fullName = "solid_nocall_strategy", shortName="solid_nocall_strategy", doc="Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required=false)
+    @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
     public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+
+    /**
+     * The context covariate will use a context of this size to calculate it's covariate value
+     */
+    @Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
+    int CONTEXT_SIZE = 8;
+
 }

From 5961868a7fe4463549fdf43045315f962510e1bd Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Mon, 6 Feb 2012 22:47:27 -0500
Subject: [PATCH 19/67] fixup for BQSR (HC integration tests)

In the new BQSR implementation, covariates do depend on the RecalibrationArgumentCollection.
---
 .../recalibration/RecalibrationArgumentCollection.java        | 2 +-
 .../sting/utils/recalibration/BaseRecalibration.java          | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
index ffdb0cca7..7f3035f1e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
@@ -87,6 +87,6 @@ public class RecalibrationArgumentCollection {
      * The context covariate will use a context of this size to calculate it's covariate value
      */
     @Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
-    int CONTEXT_SIZE = 8;
+    public int CONTEXT_SIZE = 8;
 
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index ce52f09a2..75d4b1e17 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.recalibration;
 import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
+import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.collections.NestedHashMap;
@@ -121,8 +122,9 @@ public class BaseRecalibration {
                         final boolean createCollapsedTables = true;
 
                         // Initialize any covariate member variables using the shared argument collection
+                        RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
                         for( Covariate cov : requestedCovariates ) {
-                            cov.initialize( null ); // BUGBUG: do any of the used covariates actually need the RecalibrationArgumentCollection?
+                            cov.initialize( RAC );
                         }
                         // Initialize the data hashMaps
                         dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );

From 9d1a19bbaab27a419efaa8b906084378961f4af0 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 6 Feb 2012 22:49:29 -0500
Subject: [PATCH 20/67] Multi-allelic indels were not being printed out
 correctly in VariantsToTable; fixed.

---
 .../walkers/variantutils/VariantsToTable.java   | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index 4b3aa4864..e43d54e14 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -272,12 +272,11 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
         getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } });
         getters.put("REF", new Getter() {
             public String get(VariantContext vc) {
-                String x = "";
-                if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) {
-                    Byte refByte = vc.getReferenceBaseForIndel();
-                    x=x+new String(new byte[]{refByte});
-                }
-                return x+vc.getReference().getDisplayString();
+                StringBuilder x = new StringBuilder();
+                if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
+                    x.append((char)vc.getReferenceBaseForIndel().byteValue());
+                x.append(vc.getReference().getDisplayString());
+                return x.toString();
             }
         });
         getters.put("ALT", new Getter() {
@@ -285,13 +284,11 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
                 StringBuilder x = new StringBuilder();
                 int n = vc.getAlternateAlleles().size();
                 if ( n == 0 ) return ".";
-                if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) {
-                    Byte refByte = vc.getReferenceBaseForIndel();
-                    x.append(new String(new byte[]{refByte}));
-                }
 
                 for ( int i = 0; i < n; i++ ) {
                     if ( i != 0 ) x.append(",");
+                    if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
+                        x.append((char)vc.getReferenceBaseForIndel().byteValue());
                     x.append(vc.getAlternateAllele(i).getDisplayString());
                 }
                 return x.toString();

From 718da7757e75a2367da8b1e6351ceef60bf76c47 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 7 Feb 2012 13:15:58 -0500
Subject: [PATCH 22/67] Fixes to ValidateVariants as per GS post: ref base of
 mixed alleles were sometimes wrong, error print out of bad ACs was throwing a
 RuntimeException, don't validate ACs if there are no genotypes.

---
 .../gatk/walkers/variantutils/ValidateVariants.java   | 11 +++++------
 .../sting/utils/variantcontext/VariantContext.java    |  8 +++++++-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
index fdfca982c..530258fe0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java
@@ -128,13 +128,13 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
 
         // get the true reference allele
         Allele reportedRefAllele = vc.getReference();
-        Allele observedRefAllele;
+        Allele observedRefAllele = null;
         // insertions
         if ( vc.isSimpleInsertion() ) {
             observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
         }
         // deletions
-        else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) {
+        else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
             // we can't validate arbitrarily long deletions
             if ( reportedRefAllele.length() > 100 ) {
                 logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
@@ -143,16 +143,15 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
 
             // deletions are associated with the (position of) the last (preceding) non-deleted base;
             // hence to get actually deleted bases we need offset = 1
-            int offset = 1 ;
-            if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
+            int offset = vc.isMNP() ? 0 : 1;
             byte[] refBytes = ref.getBases();
             byte[] trueRef = new byte[reportedRefAllele.length()];
             for (int i = 0; i < reportedRefAllele.length(); i++)
                 trueRef[i] = refBytes[i+offset];
             observedRefAllele = Allele.create(trueRef, true);
         }
-        // SNPs, etc.
-        else {
+        // SNPs, etc. but not mixed types because they are too difficult
+        else if ( !vc.isMixed() ) {
             byte[] refByte = new byte[1];
             refByte[0] = ref.getBase();
             observedRefAllele = Allele.create(refByte, true);
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 247e412dd..27721be95 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -920,6 +920,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
     }
 
     public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
+        if ( reference == null )
+            return;
+
         // don't validate if we're a complex event
         if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
             throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
@@ -963,6 +966,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
     }
 
     public void validateChromosomeCounts() {
+        if ( !hasGenotypes() )
+            return;
+        
         // AN
         if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
             int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
@@ -993,7 +999,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
                     throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.size(), observedACs.size()));
                 for (int i = 0; i < observedACs.size(); i++) {
                     if ( Integer.valueOf(reportedACs.get(i).toString()) != observedACs.get(i) )
-                        throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
+                        throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
                 }
             } else {
                 if ( observedACs.size() != 1 )

From 717cd4b912df8787deee2f9e6ec61d26bf05ea98 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 7 Feb 2012 13:30:54 -0500
Subject: [PATCH 23/67] Document -L unmapped

---
 .../sting/gatk/arguments/GATKArgumentCollection.java             | 1 +
 1 file changed, 1 insertion(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index b3a1e2488..8ec707801 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -75,6 +75,7 @@ public class GATKArgumentCollection {
      * Using this option one can instruct the GATK engine to traverse over only part of the genome.  This argument can be specified multiple times.
      * One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
      * Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
+     * To specify the completely unmapped reads in the BAM file (i.e. those without a reference contig) use -L unmapped.
      */
     @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
     public List<IntervalBinding<Feature>> intervals = null;

From 0d3ea0401c0afb82487a1c2018750350fad790ca Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 7 Feb 2012 13:22:46 -0500
Subject: [PATCH 24/67] BQSR Parameter cleanup

   * get rid of 320C argument that nobody uses.
   * get rid of DEFAULT_READ_GROUP parameter and functionality (later to become an engine argument).
---
 .../recalibration/CountCovariatesWalker.java  | 278 +++++++++---------
 .../recalibration/RecalDataManager.java       |  26 --
 .../RecalibrationArgumentCollection.java      |  22 +-
 .../TableRecalibrationWalker.java             | 247 ++++++++--------
 4 files changed, 280 insertions(+), 293 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index 4e3d4048b..626460be6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -77,20 +77,20 @@ import java.util.Map;
  * <h2>Output</h2>
  * <p>
  * A recalibration table file in CSV format that is used by the TableRecalibration walker.
- * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.  
+ * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.
  *
- * The first 20 lines of such a file is shown below.  
+ * The first 20 lines of such a file is shown below.
  * * The file begins with a series of comment lines describing:
  * ** The number of counted loci
  * ** The number of counted bases
  * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
- * 
- * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.  
+ *
+ * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.
  *
  * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
- * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of 
+ * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of
  * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
- * 
+ *
  * <pre>
  * # Counted Sites    19451059
  * # Counted Bases    56582018
@@ -129,13 +129,14 @@ import java.util.Map;
  *   -cov DinucCovariate \
  *   -recalFile my_reads.recal_data.csv
  * </pre>
- *
  */
 
 @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
-@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
-@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
-@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
+@By(DataSource.READS) // Only look at covered loci, not every loci of the reference file
+@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})
+// Filter out all reads with zero or unavailable mapping quality
+@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+// This walker requires both -I input.bam and -R reference.fasta
 @PartitionBy(PartitionType.LOCUS)
 public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
 
@@ -149,7 +150,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
     /////////////////////////////
     // Shared Arguments
     /////////////////////////////
-    @ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
+    @ArgumentCollection
+    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
 
     /////////////////////////////
     // Command Line Arguments
@@ -160,7 +162,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      * for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
      * Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
      */
-    @Input(fullName="knownSites", shortName = "knownSites", doc="A database of known polymorphic sites to skip over in the recalibration algorithm", required=false)
+    @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
     public List<RodBinding<Feature>> knownSites = Collections.emptyList();
 
     /**
@@ -169,31 +171,31 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
      * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
      */
-    @Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file")
+    @Output(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the output covariates table recalibration file")
     @Gather(CountCovariatesGatherer.class)
     public PrintStream RECAL_FILE;
 
-    @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
+    @Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
     private boolean LIST_ONLY = false;
 
     /**
      * See the -list argument to view available covariates.
      */
-    @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
+    @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false)
     private String[] COVARIATES = null;
-    @Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)
+    @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false)
     private boolean USE_STANDARD_COVARIATES = false;
 
     /////////////////////////////
     // Debugging-only Arguments
     /////////////////////////////
-    @Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
+    @Argument(fullName = "dont_sort_output", shortName = "unsorted", required = false, doc = "If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
     private boolean DONT_SORT_OUTPUT = false;
 
     /**
      * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
      */
-    @Argument(fullName="run_without_dbsnp_potentially_ruining_quality", shortName="run_without_dbsnp_potentially_ruining_quality", required=false, doc="If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
+    @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
     private boolean RUN_WITHOUT_DBSNP = false;
 
     /////////////////////////////
@@ -217,6 +219,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
         /**
          * Adds the values of other to this, returning this
+         *
          * @param other
          * @return this object
          */
@@ -247,53 +250,55 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      */
     public void initialize() {
 
-        if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
-        if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
+        if (RAC.FORCE_PLATFORM != null) {
+            RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
+        }
 
         // Get a list of all available covariates
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>( Covariate.class ).getPlugins();
-        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>( RequiredCovariate.class ).getPlugins();
-        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>( StandardCovariate.class ).getPlugins();
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
+        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
+        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
 
         // Print and exit if that's what was requested
-        if ( LIST_ONLY ) {
-            logger.info( "Available covariates:" );
-            for( Class<?> covClass : covariateClasses ) {
-                logger.info( covClass.getSimpleName() );
+        if (LIST_ONLY) {
+            logger.info("Available covariates:");
+            for (Class<?> covClass : covariateClasses) {
+                logger.info(covClass.getSimpleName());
             }
             logger.info("");
 
-            System.exit( 0 ); // Early exit here because user requested it
+            System.exit(0); // Early exit here because user requested it
         }
 
         // Warn the user if no dbSNP file or other variant mask was specified
-        if( knownSites.isEmpty() && !RUN_WITHOUT_DBSNP ) {
+        if (knownSites.isEmpty() && !RUN_WITHOUT_DBSNP) {
             throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
         }
 
         // Initialize the requested covariates by parsing the -cov argument
         // First add the required covariates
-        if( requiredClasses.size() == 2) { // readGroup and reported quality score
-            requestedCovariates.add( new ReadGroupCovariate() ); // Order is important here
-            requestedCovariates.add( new QualityScoreCovariate() );
-        } else {
+        if (requiredClasses.size() == 2) { // readGroup and reported quality score
+            requestedCovariates.add(new ReadGroupCovariate()); // Order is important here
+            requestedCovariates.add(new QualityScoreCovariate());
+        }
+        else {
             throw new UserException.CommandLineException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order.");
         }
         // Next add the standard covariates if -standard was specified by the user
-        if( USE_STANDARD_COVARIATES ) {
+        if (USE_STANDARD_COVARIATES) {
             // We want the standard covariates to appear in a consistent order but the packageUtils method gives a random order
             // A list of Classes can't be sorted, but a list of Class names can be
             final List<String> standardClassNames = new ArrayList<String>();
-            for( Class<?> covClass : standardClasses ) {
-                standardClassNames.add( covClass.getName() );
+            for (Class<?> covClass : standardClasses) {
+                standardClassNames.add(covClass.getName());
             }
             Collections.sort(standardClassNames); // Sort the list of class names
-            for( String className : standardClassNames ) {
-                for( Class<?> covClass : standardClasses ) { // Find the class that matches this class name
-                    if( covClass.getName().equals( className ) ) {
+            for (String className : standardClassNames) {
+                for (Class<?> covClass : standardClasses) { // Find the class that matches this class name
+                    if (covClass.getName().equals(className)) {
                         try {
-                            final Covariate covariate = (Covariate)covClass.newInstance();
-                            requestedCovariates.add( covariate );
+                            final Covariate covariate = (Covariate) covClass.newInstance();
+                            requestedCovariates.add(covariate);
                         } catch (Exception e) {
                             throw new DynamicClassResolutionException(covClass, e);
                         }
@@ -302,17 +307,17 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
             }
         }
         // Finally parse the -cov arguments that were provided, skipping over the ones already specified
-        if( COVARIATES != null ) {
-            for( String requestedCovariateString : COVARIATES ) {
+        if (COVARIATES != null) {
+            for (String requestedCovariateString : COVARIATES) {
                 boolean foundClass = false;
-                for( Class<?> covClass : covariateClasses ) {
-                    if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
+                for (Class<?> covClass : covariateClasses) {
+                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class
                         foundClass = true;
-                        if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) {
+                        if (!requiredClasses.contains(covClass) && (!USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
                             try {
                                 // Now that we've found a matching class, try to instantiate it
-                                final Covariate covariate = (Covariate)covClass.newInstance();
-                                requestedCovariates.add( covariate );
+                                final Covariate covariate = (Covariate) covClass.newInstance();
+                                requestedCovariates.add(covariate);
                             } catch (Exception e) {
                                 throw new DynamicClassResolutionException(covClass, e);
                             }
@@ -320,20 +325,19 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
                     }
                 }
 
-                if( !foundClass ) {
-                    throw new UserException.CommandLineException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." );
+                if (!foundClass) {
+                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
                 }
             }
         }
 
-        logger.info( "The covariates being used here: " );
-        for( Covariate cov : requestedCovariates ) {
-            logger.info( "\t" + cov.getClass().getSimpleName() );
-            cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
+        logger.info("The covariates being used here: ");
+        for (Covariate cov : requestedCovariates) {
+            logger.info("\t" + cov.getClass().getSimpleName());
+            cov.initialize(RAC); // Initialize any covariate member variables using the shared argument collection
         }
     }
 
-
     //---------------------------------------------------------------------------------------------------------------
     //
     // map
@@ -342,62 +346,63 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
     /**
      * For each read at this locus get the various covariate values and increment that location in the map based on
-     *   whether or not the base matches the reference at this particular location
+     * whether or not the base matches the reference at this particular location
+     *
      * @param tracker The reference metadata tracker
-     * @param ref The reference context
+     * @param ref     The reference context
      * @param context The alignment context
      * @return Returns 1, but this value isn't used in the reduce step
      */
-    public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
+    public CountedData map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
         // Only use data from non-dbsnp sites
         // Assume every mismatch at a non-dbsnp site is indicative of poor quality
         CountedData counter = new CountedData();
-        if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
+        if (tracker.getValues(knownSites).size() == 0) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
             // For each read at this locus
-            for( final PileupElement p : context.getBasePileup() ) {
+            for (final PileupElement p : context.getBasePileup()) {
                 final GATKSAMRecord gatkRead = p.getRead();
                 int offset = p.getOffset();
 
-                if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
+                if (gatkRead.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE)) {
                     continue;
                 }
 
-                if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
-                {
-                    gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true );
-                    RecalDataManager.parseSAMRecord( gatkRead, RAC );
+                if (!gatkRead.containsTemporaryAttribute(SEEN_ATTRIBUTE)) {
+                    gatkRead.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
+                    RecalDataManager.parseSAMRecord(gatkRead, RAC);
 
                     // Skip over reads with no calls in the color space if the user requested it
-                    if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
-                        gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
+                    if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace(gatkRead)) {
+                        gatkRead.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                         continue;
                     }
 
-                    RecalDataManager.parseColorSpace( gatkRead );
-                    gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
-                            RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
+                    RecalDataManager.parseColorSpace(gatkRead);
+                    gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
                 }
 
                 // Skip this position if base quality is zero
-                if( gatkRead.getBaseQualities()[offset] > 0 ) {
+                if (gatkRead.getBaseQualities()[offset] > 0) {
 
                     byte[] bases = gatkRead.getReadBases();
                     byte refBase = ref.getBase();
 
                     // Skip if this base is an 'N' or etc.
-                    if( BaseUtils.isRegularBase( bases[offset] ) ) {
+                    if (BaseUtils.isRegularBase(bases[offset])) {
 
                         // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
-                        if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
-                            !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
+                        if (!gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
+                                !RecalDataManager.isInconsistentColorSpace(gatkRead, offset)) {
 
                             // This base finally passed all the checks for a good base, so add it to the big data hashmap
-                            updateDataFromRead( counter, gatkRead, offset, refBase );
+                            updateDataFromRead(counter, gatkRead, offset, refBase);
 
-                        } else { // calculate SOLID reference insertion rate
-                            if( refBase == bases[offset] ) {
+                        }
+                        else { // calculate SOLID reference insertion rate
+                            if (refBase == bases[offset]) {
                                 counter.solidInsertedReferenceBases++;
-                            } else {
+                            }
+                            else {
                                 counter.otherColorSpaceInconsistency++;
                             }
                         }
@@ -405,7 +410,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
                 }
             }
             counter.countedSites++;
-        } else { // We skipped over the dbSNP site, and we are only processing every Nth locus
+        }
+        else { // We skipped over the dbSNP site, and we are only processing every Nth locus
             counter.skippedSites++;
             updateMismatchCounts(counter, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
         }
@@ -413,7 +419,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
         return counter;
     }
 
-   /**
+    /**
      * Update the mismatch / total_base counts for a given class of loci.
      *
      * @param counter The CountedData to be updated
@@ -421,13 +427,13 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      * @param refBase The reference base
      */
     private static void updateMismatchCounts(CountedData counter, final AlignmentContext context, final byte refBase) {
-        for( PileupElement p : context.getBasePileup() ) {
+        for (PileupElement p : context.getBasePileup()) {
             final byte readBase = p.getBase();
             final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
-            final int refBaseIndex  = BaseUtils.simpleBaseToBaseIndex(refBase);
+            final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
 
-            if( readBaseIndex != -1 && refBaseIndex != -1 ) {
-                if( readBaseIndex != refBaseIndex ) {
+            if (readBaseIndex != -1 && refBaseIndex != -1) {
+                if (readBaseIndex != refBaseIndex) {
                     counter.novelCountsMM++;
                 }
                 counter.novelCountsBases++;
@@ -439,13 +445,14 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      * Major workhorse routine for this walker.
      * Loop through the list of requested covariates and pick out the value from the read, offset, and reference
      * Using the list of covariate values as a key, pick out the RecalDatum and increment,
-     *   adding one to the number of observations and potentially one to the number of mismatches
+     * adding one to the number of observations and potentially one to the number of mismatches
      * Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
-     *   because pulling things out of the SAMRecord is an expensive operation.
-     * @param counter Data structure which holds the counted bases
+     * because pulling things out of the SAMRecord is an expensive operation.
+     *
+     * @param counter  Data structure which holds the counted bases
      * @param gatkRead The SAMRecord holding all the data for this read
-     * @param offset The offset in the read for this locus
-     * @param refBase The reference base at this locus
+     * @param offset   The offset in the read for this locus
+     * @param refBase  The reference base at this locus
      */
     private void updateDataFromRead(CountedData counter, final GATKSAMRecord gatkRead, final int offset, final byte refBase) {
         final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
@@ -453,10 +460,10 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
         // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
         final NestedHashMap data = dataManager.data; //optimization - create local reference
-        RecalDatumOptimized datum = (RecalDatumOptimized) data.get( key );
-        if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
+        RecalDatumOptimized datum = (RecalDatumOptimized) data.get(key);
+        if (datum == null) { // key doesn't exist yet in the map so make a new bucket and add it
             // initialized with zeros, will be incremented at end of method
-            datum = (RecalDatumOptimized)data.put( new RecalDatumOptimized(), true, (Object[])key );
+            datum = (RecalDatumOptimized) data.put(new RecalDatumOptimized(), true, (Object[]) key);
         }
 
         // Need the bases to determine whether or not we have a mismatch
@@ -464,13 +471,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
         final long curMismatches = datum.getNumMismatches();
 
         // Add one to the number of observations and potentially one to the number of mismatches
-        datum.incrementBaseCounts( base, refBase );
+        datum.incrementBaseCounts(base, refBase);
         counter.countedBases++;
         counter.novelCountsBases++;
         counter.novelCountsMM += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
     }
 
-
     //---------------------------------------------------------------------------------------------------------------
     //
     // reduce
@@ -479,6 +485,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
     /**
      * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
+     *
      * @return returns A PrintStream created from the -recalFile filename argument specified to the walker
      */
     public CountedData reduceInit() {
@@ -487,11 +494,12 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
 
     /**
      * The Reduce method doesn't do anything for this walker.
+     *
      * @param mapped Result of the map. This value is immediately ignored.
-     * @param sum The summing CountedData used to output the CSV data
+     * @param sum    The summing CountedData used to output the CSV data
      * @return returns The sum used to output the CSV data
      */
-    public CountedData reduce( CountedData mapped, CountedData sum ) {
+    public CountedData reduce(CountedData mapped, CountedData sum) {
         // Do a dbSNP sanity check every so often
         return validatingDbsnpMismatchRate(sum.add(mapped));
     }
@@ -500,16 +508,15 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
      * Validate the dbSNP reference mismatch rates.
      */
     private CountedData validatingDbsnpMismatchRate(CountedData counter) {
-        if( ++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY ) {
+        if (++counter.lociSinceLastDbsnpCheck >= DBSNP_VALIDATION_CHECK_FREQUENCY) {
             counter.lociSinceLastDbsnpCheck = 0;
 
-            if( counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L ) {
-                final double fractionMM_novel = (double)counter.novelCountsMM / (double)counter.novelCountsBases;
-                final double fractionMM_dbsnp = (double)counter.dbSNPCountsMM / (double)counter.dbSNPCountsBases;
+            if (counter.novelCountsBases != 0L && counter.dbSNPCountsBases != 0L) {
+                final double fractionMM_novel = (double) counter.novelCountsMM / (double) counter.novelCountsBases;
+                final double fractionMM_dbsnp = (double) counter.dbSNPCountsMM / (double) counter.dbSNPCountsBases;
 
-                if( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel ) {
-                    Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
-                            String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel) );
+                if (fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel) {
+                    Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " + String.format("[dbSNP variation rate = %.4f, novel variation rate = %.4f]", fractionMM_dbsnp, fractionMM_novel));
                     DBSNP_VALIDATION_CHECK_FREQUENCY *= 2; // Don't annoyingly output the warning message every megabase of a large file
                 }
             }
@@ -518,47 +525,50 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
         return counter;
     }
 
-    public CountedData treeReduce( CountedData sum1, CountedData sum2 ) {
+    public CountedData treeReduce(CountedData sum1, CountedData sum2) {
         return validatingDbsnpMismatchRate(sum1.add(sum2));
     }
 
     /**
      * Write out the full data hashmap to disk in CSV format
+     *
      * @param sum The CountedData to write out to RECAL_FILE
      */
-    public void onTraversalDone( CountedData sum ) {
-        logger.info( "Writing raw recalibration data..." );
-        if( sum.countedBases == 0L ) {
+    public void onTraversalDone(CountedData sum) {
+        logger.info("Writing raw recalibration data...");
+        if (sum.countedBases == 0L) {
             throw new UserException.BadInput("Could not find any usable data in the input BAM file(s).");
         }
-        outputToCSV( sum, RECAL_FILE );
-        logger.info( "...done!" );
+        outputToCSV(sum, RECAL_FILE);
+        logger.info("...done!");
     }
 
     /**
      * For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
+     *
      * @param recalTableStream The PrintStream to write out to
      */
-    private void outputToCSV( CountedData sum, final PrintStream recalTableStream ) {
+    private void outputToCSV(CountedData sum, final PrintStream recalTableStream) {
         recalTableStream.printf("# Counted Sites    %d%n", sum.countedSites);
         recalTableStream.printf("# Counted Bases    %d%n", sum.countedBases);
         recalTableStream.printf("# Skipped Sites    %d%n", sum.skippedSites);
-        recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)sum.countedSites / sum.skippedSites);
+        recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double) sum.countedSites / sum.skippedSites);
 
-        if( sum.solidInsertedReferenceBases != 0 ) {
+        if (sum.solidInsertedReferenceBases != 0) {
             recalTableStream.printf("# Fraction SOLiD inserted reference 1 / %.0f bases%n", (double) sum.countedBases / sum.solidInsertedReferenceBases);
             recalTableStream.printf("# Fraction other color space inconsistencies 1 / %.0f bases%n", (double) sum.countedBases / sum.otherColorSpaceInconsistency);
         }
 
         // Output header saying which covariates were used and in what order
-        for( Covariate cov : requestedCovariates ) {
-            recalTableStream.print( cov.getClass().getSimpleName().split("Covariate")[0] + "," );
+        for (Covariate cov : requestedCovariates) {
+            recalTableStream.print(cov.getClass().getSimpleName().split("Covariate")[0] + ",");
         }
         recalTableStream.println("nObservations,nMismatches,Qempirical");
 
-        if( DONT_SORT_OUTPUT ) {
+        if (DONT_SORT_OUTPUT) {
             printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
-        } else {
+        }
+        else {
             printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
         }
 
@@ -566,45 +576,47 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
         recalTableStream.println(TableRecalibrationWalker.EOF_MARKER);
     }
 
-    private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
+    private void printMappingsSorted(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
         final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
-        for( Object comp : data.keySet() ) {
+        for (Object comp : data.keySet()) {
             keyList.add((Comparable) comp);
         }
 
         Collections.sort(keyList);
 
-        for( Comparable comp : keyList ) {
+        for (Comparable comp : keyList) {
             key[curPos] = comp;
             final Object val = data.get(comp);
-            if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
+            if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
                 // For each Covariate in the key
-                for( Object compToPrint : key ) {
+                for (Object compToPrint : key) {
                     // Output the Covariate's value
-                    recalTableStream.print( compToPrint + "," );
+                    recalTableStream.print(compToPrint + ",");
                 }
                 // Output the RecalDatum entry
-                recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
-            } else { // Another layer in the nested hash map
-                printMappingsSorted( recalTableStream, curPos + 1, key, (Map) val );
+                recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
+            }
+            else { // Another layer in the nested hash map
+                printMappingsSorted(recalTableStream, curPos + 1, key, (Map) val);
             }
         }
     }
 
-    private void printMappings( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
-        for( Object comp : data.keySet() ) {
+    private void printMappings(final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
+        for (Object comp : data.keySet()) {
             key[curPos] = comp;
             final Object val = data.get(comp);
-            if( val instanceof RecalDatumOptimized ) { // We are at the end of the nested hash maps
+            if (val instanceof RecalDatumOptimized) { // We are at the end of the nested hash maps
                 // For each Covariate in the key
-                for( Object compToPrint : key ) {
+                for (Object compToPrint : key) {
                     // Output the Covariate's value
-                    recalTableStream.print( compToPrint + "," );
+                    recalTableStream.print(compToPrint + ",");
                 }
                 // Output the RecalDatum entry
-                recalTableStream.println( ((RecalDatumOptimized)val).outputToCSV() );
-            } else { // Another layer in the nested hash map
-                printMappings( recalTableStream, curPos + 1, key, (Map) val );
+                recalTableStream.println(((RecalDatumOptimized) val).outputToCSV());
+            }
+            else { // Another layer in the nested hash map
+                printMappings(recalTableStream, curPos + 1, key, (Map) val);
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 18b33c0e8..72c2b2829 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -256,32 +256,6 @@ public class RecalDataManager {
     public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
         GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
 
-        // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
-        if (readGroup == null) {
-            if (RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
-                if (!warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null) {
-                    Utils.warnUser("The input .bam file contains reads with no read group. " +
-                            "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
-                            "First observed at read with name = " + read.getReadName());
-                    warnUserNullReadGroup = true;
-                }
-                // There is no readGroup so defaulting to these values
-                readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
-                readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
-                ((GATKSAMRecord) read).setReadGroup(readGroup);
-            }
-            else {
-                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
-            }
-        }
-
-        if (RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP)) { // Collapse all the read groups into a single common String provided by the user
-            final String oldPlatform = readGroup.getPlatform();
-            readGroup = new GATKSAMReadGroupRecord(RAC.FORCE_READ_GROUP);
-            readGroup.setPlatform(oldPlatform);
-            ((GATKSAMRecord) read).setReadGroup(readGroup);
-        }
-
         if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
             readGroup.setPlatform(RAC.FORCE_PLATFORM);
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
index 7f3035f1e..9752b1dee 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
@@ -43,31 +43,15 @@ public class RecalibrationArgumentCollection {
     // Shared Command Line Arguments
     //////////////////////////////////
     @Hidden
-    @Argument(fullName = "default_read_group", shortName = "dRG", required = false, doc = "If a read has no read group then default to the provided String.")
-    public String DEFAULT_READ_GROUP = null;
-    @Hidden
     @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
     public String DEFAULT_PLATFORM = null;
     @Hidden
-    @Argument(fullName = "force_read_group", shortName = "fRG", required = false, doc = "If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
-    public String FORCE_READ_GROUP = null;
-    @Hidden
     @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
     public String FORCE_PLATFORM = null;
     @Hidden
     @Argument(fullName = "window_size_nqs", shortName = "nqs", doc = "The window size used by MinimumNQSCovariate for its calculation", required = false)
     public int WINDOW_SIZE = 5;
 
-    /**
-     * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
-     */
-    @Hidden
-    @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
-    public int HOMOPOLYMER_NBACK = 7;
-    @Hidden
-    @Argument(fullName = "exception_if_no_tile", shortName = "throwTileException", doc = "If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required = false)
-    public boolean EXCEPTION_IF_NO_TILE = false;
-
     /**
      * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
      * reads which have had the reference inserted because of color space inconsistencies.
@@ -89,4 +73,10 @@ public class RecalibrationArgumentCollection {
     @Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
     public int CONTEXT_SIZE = 8;
 
+    /**
+     * This window size tells the module in how big of a neighborhood around the current base it should look for the minimum base quality score.
+     */
+    @Argument(fullName = "homopolymer_nback", shortName = "nback", doc = "The number of previous bases to look at in HomopolymerCovariate", required = false)
+    public int HOMOPOLYMER_NBACK = 7;
+
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index a8006d506..cd848cd9e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -86,12 +86,12 @@ import java.util.regex.Pattern;
  *   -o my_reads.recal.bam \
  *   -recalFile my_reads.recal_data.csv
  * </pre>
- *
  */
 
 @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
 @WalkerName("TableRecalibration")
-@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
+@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+// This walker requires -I input.bam, it also requires -R reference.fasta
 public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
 
     public static final String PROGRAM_RECORD_NAME = "GATK TableRecalibration";
@@ -99,7 +99,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
     /////////////////////////////
     // Shared Arguments
     /////////////////////////////
-    @ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
+    @ArgumentCollection
+    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
 
     /////////////////////////////
     // Command Line Arguments
@@ -110,12 +111,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
      * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
      */
-    @Input(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the input covariates table recalibration .csv file")
+    @Input(fullName = "recal_file", shortName = "recalFile", required = true, doc = "Filename for the input covariates table recalibration .csv file")
     public File RECAL_FILE = null;
     /**
      * A new bam file in which the quality scores in each read have been recalibrated. The alignment of the reads is left untouched.
      */
-    @Output(doc="The output recalibrated BAM file", required=true)
+    @Output(doc = "The output recalibrated BAM file", required = true)
     private StingSAMFileWriter OUTPUT_BAM = null;
 
     /**
@@ -126,7 +127,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      * your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases
      * are unmodified during recalibration, so they don't get inappropriately evaluated.
      */
-    @Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
+    @Argument(fullName = "preserve_qscores_less_than", shortName = "pQ", doc = "Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required = false)
     private int PRESERVE_QSCORES_LESS_THAN = 5;
 
     /**
@@ -135,37 +136,36 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      * argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example,
      * --smoothing 15 for a large amount of smoothing.
      */
-    @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
+    @Argument(fullName = "smoothing", shortName = "sm", required = false, doc = "Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
     private int SMOOTHING = 1;
 
     /**
      * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
      * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
      */
-    @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores")
+    @Argument(fullName = "max_quality_score", shortName = "maxQ", required = false, doc = "The integer value at which to cap the quality scores")
     private int MAX_QUALITY_SCORE = 50;
 
     /**
      * By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun
      * the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag.
      */
-    @Argument(fullName="doNotWriteOriginalQuals", shortName="noOQs", required=false, doc="If true, we will not write the original quality (OQ) tag for each read")
+    @Argument(fullName = "doNotWriteOriginalQuals", shortName = "noOQs", required = false, doc = "If true, we will not write the original quality (OQ) tag for each read")
     private boolean DO_NOT_WRITE_OQ = false;
 
     /////////////////////////////
     // Debugging-only Arguments
     /////////////////////////////
     @Hidden
-    @Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
+    @Argument(fullName = "no_pg_tag", shortName = "noPG", required = false, doc = "Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
     private boolean NO_PG_TAG = false;
     @Hidden
-    @Argument(fullName="fail_with_no_eof_marker", shortName="requireEOF", required=false, doc="If no EOF marker is present in the covariates file, exit the program with an exception.")
+    @Argument(fullName = "fail_with_no_eof_marker", shortName = "requireEOF", required = false, doc = "If no EOF marker is present in the covariates file, exit the program with an exception.")
     private boolean REQUIRE_EOF = false;
     @Hidden
-    @Argument(fullName="skipUQUpdate", shortName="skipUQUpdate", required=false, doc="If true, we will skip the UQ updating step for each read, speeding up the calculations")
+    @Argument(fullName = "skipUQUpdate", shortName = "skipUQUpdate", required = false, doc = "If true, we will skip the UQ updating step for each read, speeding up the calculations")
     private boolean skipUQUpdate = false;
 
-
     /////////////////////////////
     // Private Member Variables
     /////////////////////////////
@@ -195,8 +195,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      */
     public void initialize() {
 
-        if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
-        if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
+        if (RAC.FORCE_PLATFORM != null) {
+            RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM;
+        }
 
         // Get a list of all available covariates
         final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
@@ -205,31 +206,33 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
         boolean foundAllCovariates = false;
 
         // Read in the data from the csv file and populate the data map and covariates list
-        logger.info( "Reading in the data from input csv file..." );
+        logger.info("Reading in the data from input csv file...");
 
         boolean sawEOF = false;
         try {
-            for ( String line : new XReadLines(RECAL_FILE) ) {
+            for (String line : new XReadLines(RECAL_FILE)) {
                 lineNumber++;
-                if ( EOF_MARKER.equals(line) ) {
+                if (EOF_MARKER.equals(line)) {
                     sawEOF = true;
-                } else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() )  {
+                }
+                else if (COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
                     ; // Skip over the comment lines, (which start with '#')
                 }
                 // Read in the covariates that were used from the input file
-                else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
-                    if( foundAllCovariates ) {
-                        throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
-                    } else { // Found the covariate list in input file, loop through all of them and instantiate them
+                else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
+                    if (foundAllCovariates) {
+                        throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
+                    }
+                    else { // Found the covariate list in input file, loop through all of them and instantiate them
                         String[] vals = line.split(",");
-                        for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
+                        for (int iii = 0; iii < vals.length - 3; iii++) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
                             boolean foundClass = false;
-                            for( Class<?> covClass : classes ) {
-                                if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
+                            for (Class<?> covClass : classes) {
+                                if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
                                     foundClass = true;
                                     try {
-                                        Covariate covariate = (Covariate)covClass.newInstance();
-                                        requestedCovariates.add( covariate );
+                                        Covariate covariate = (Covariate) covClass.newInstance();
+                                        requestedCovariates.add(covariate);
                                     } catch (Exception e) {
                                         throw new DynamicClassResolutionException(covClass, e);
                                     }
@@ -237,107 +240,110 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
                                 }
                             }
 
-                            if( !foundClass ) {
-                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
+                            if (!foundClass) {
+                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
                             }
                         }
                     }
 
-                } else { // Found a line of data
-                    if( !foundAllCovariates ) {
+                }
+                else { // Found a line of data
+                    if (!foundAllCovariates) {
                         foundAllCovariates = true;
 
                         // At this point all the covariates should have been found and initialized
-                        if( requestedCovariates.size() < 2 ) {
-                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
+                        if (requestedCovariates.size() < 2) {
+                            throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
                         }
 
                         final boolean createCollapsedTables = true;
 
                         // Initialize any covariate member variables using the shared argument collection
-                        for( Covariate cov : requestedCovariates ) {
-                            cov.initialize( RAC );
+                        for (Covariate cov : requestedCovariates) {
+                            cov.initialize(RAC);
                         }
                         // Initialize the data hashMaps
-                        dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
+                        dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
 
                     }
                     addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
                 }
             }
 
-        } catch ( FileNotFoundException e ) {
+        } catch (FileNotFoundException e) {
             throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
-        } catch ( NumberFormatException e ) {
+        } catch (NumberFormatException e) {
             throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
         }
-        logger.info( "...done!" );
+        logger.info("...done!");
 
-        if ( !sawEOF ) {
+        if (!sawEOF) {
             final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
-            if ( REQUIRE_EOF )
+            if (REQUIRE_EOF)
                 throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
             logger.warn(errorMessage);
         }
 
-        logger.info( "The covariates being used here: " );
-        for( Covariate cov : requestedCovariates ) {
-            logger.info( "\t" + cov.getClass().getSimpleName() );
+        logger.info("The covariates being used here: ");
+        for (Covariate cov : requestedCovariates) {
+            logger.info("\t" + cov.getClass().getSimpleName());
         }
 
-        if( dataManager == null ) {
+        if (dataManager == null) {
             throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
         }
 
         // Create the tables of empirical quality scores that will be used in the sequential calculation
-        logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
-        dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
-        logger.info( "...done!" );
+        logger.info("Generating tables of empirical qualities for use in sequential calculation...");
+        dataManager.generateEmpiricalQualities(SMOOTHING, MAX_QUALITY_SCORE);
+        logger.info("...done!");
 
         // Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
         final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
-        if( !NO_PG_TAG ) {
+        if (!NO_PG_TAG) {
             final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
             final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
             try {
                 final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
                 programRecord.setProgramVersion(version);
-            } catch (MissingResourceException e) {}
+            } catch (MissingResourceException e) {
+            }
 
             StringBuffer sb = new StringBuffer();
             sb.append(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this));
             sb.append(" Covariates=[");
-            for( Covariate cov : requestedCovariates ) {
+            for (Covariate cov : requestedCovariates) {
                 sb.append(cov.getClass().getSimpleName());
                 sb.append(", ");
             }
-            sb.setCharAt(sb.length()-2, ']');
-            sb.setCharAt(sb.length()-1, ' ');
+            sb.setCharAt(sb.length() - 2, ']');
+            sb.setCharAt(sb.length() - 1, ' ');
             programRecord.setCommandLine(sb.toString());
 
             List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
-            for ( SAMProgramRecord record : oldRecords ) {
-                if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) )
+            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size() + 1);
+            for (SAMProgramRecord record : oldRecords) {
+                if (!record.getId().startsWith(PROGRAM_RECORD_NAME))
                     newRecords.add(record);
             }
             newRecords.add(programRecord);
             header.setProgramRecords(newRecords);
 
             // Write out the new header
-            OUTPUT_BAM.writeHeader( header );
+            OUTPUT_BAM.writeHeader(header);
         }
     }
 
     /**
      * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
+     *
      * @param line A line of CSV data read from the recalibration table data file
      */
     private void addCSVData(final File file, final String line) {
         final String[] vals = line.split(",");
 
         // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
+        if (vals.length != requestedCovariates.size() + 3) { // +3 because of nObservations, nMismatch, and Qempirical
             throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
                     " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
         }
@@ -345,15 +351,15 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
         final Object[] key = new Object[requestedCovariates.size()];
         Covariate cov;
         int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
-            cov = requestedCovariates.get( iii );
-            key[iii] = cov.getValue( vals[iii] );
+        for (iii = 0; iii < requestedCovariates.size(); iii++) {
+            cov = requestedCovariates.get(iii);
+            key[iii] = cov.getValue(vals[iii]);
         }
 
         // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
+        final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
         // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
+        dataManager.addToAllTables(key, datum, PRESERVE_QSCORES_LESS_THAN);
     }
 
     //---------------------------------------------------------------------------------------------------------------
@@ -366,64 +372,63 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      * For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
      *
      * @param refBases References bases over the length of the read
-     * @param read The read to be recalibrated
+     * @param read     The read to be recalibrated
      * @return The read with quality scores replaced
      */
-    public SAMRecord map( ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker  ) {
+    public SAMRecord map(ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
 
-        if( read.getReadLength() == 0 ) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
+        if (read.getReadLength() == 0) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
             return read;
         }
 
-        RecalDataManager.parseSAMRecord( read, RAC );
+        RecalDataManager.parseSAMRecord(read, RAC);
 
         byte[] originalQuals = read.getBaseQualities();
         final byte[] recalQuals = originalQuals.clone();
 
         final String platform = read.getReadGroup().getPlatform();
-        if( platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) ) {
-            if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) ) {
-                final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
-                if( badColor ) {
+        if (platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING)) {
+            if (!(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)) {
+                final boolean badColor = RecalDataManager.checkNoCallColorSpace(read);
+                if (badColor) {
                     numReadsWithMalformedColorSpace++;
-                    if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
+                    if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                         return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
-                    } else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
+                    }
+                    else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                         read.setReadFailsVendorQualityCheckFlag(true);
                         return read;
                     }
                 }
             }
-            originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases() );
+            originalQuals = RecalDataManager.calcColorSpace(read, originalQuals, RAC.SOLID_RECAL_MODE, refBases == null ? null : refBases.getBases());
         }
 
         //compute all covariate values for this read
-        final Comparable[][] covariateValues_offset_x_covar =
-            RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
+        final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
 
         // For each base in the read
-        for( int offset = 0; offset < read.getReadLength(); offset++ ) {
+        for (int offset = 0; offset < read.getReadLength(); offset++) {
 
             final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
 
             Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
-            if(qualityScore == null)
-            {
-                qualityScore = performSequentialQualityCalculation( fullCovariateKey );
+            if (qualityScore == null) {
+                qualityScore = performSequentialQualityCalculation(fullCovariateKey);
                 qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
             }
 
             recalQuals[offset] = qualityScore;
         }
 
-        preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
+        preserveQScores(originalQuals, recalQuals); // Overwrite the work done if original quality score is too low
 
-        read.setBaseQualities( recalQuals ); // Overwrite old qualities with new recalibrated qualities
-        if ( !DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // Save the old qualities if the tag isn't already taken in the read
+        read.setBaseQualities(recalQuals); // Overwrite old qualities with new recalibrated qualities
+        if (!DO_NOT_WRITE_OQ && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) == null) { // Save the old qualities if the tag isn't already taken in the read
             read.setAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, SAMUtils.phredToFastq(originalQuals));
         }
 
-        if (! skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
+        if (!skipUQUpdate && refBases != null && read.getAttribute(SAMTag.UQ.name()) != null) {
             read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, refBases.getBases(), read.getAlignmentStart() - 1, false));
         }
 
@@ -440,27 +445,28 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
      *
      * Given the full recalibration table, we perform the following preprocessing steps:
      *
-     *   - calculate the global quality score shift across all data [DeltaQ]
-     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
-     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
-     *   - The final shift equation is:
+     * - calculate the global quality score shift across all data [DeltaQ]
+     * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
+     * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
+     * - The final shift equation is:
+     *
+     * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
      *
-     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
      * @param key The list of Comparables that were calculated from the covariates
      * @return A recalibrated quality score as a byte
      */
-    private byte performSequentialQualityCalculation( final Object... key ) {
+    private byte performSequentialQualityCalculation(final Object... key) {
 
-        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
+        final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
         final Object[] readGroupCollapsedKey = new Object[1];
         final Object[] qualityScoreCollapsedKey = new Object[2];
         final Object[] covariateCollapsedKey = new Object[3];
 
         // The global quality shift (over the read group only)
         readGroupCollapsedKey[0] = key[0];
-        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
+        final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0).get(readGroupCollapsedKey));
         double globalDeltaQ = 0.0;
-        if( globalRecalDatum != null ) {
+        if (globalRecalDatum != null) {
             final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
             final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
             globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
@@ -469,9 +475,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
         // The shift in quality between reported and empirical
         qualityScoreCollapsedKey[0] = key[0];
         qualityScoreCollapsedKey[1] = key[1];
-        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
+        final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1).get(qualityScoreCollapsedKey));
         double deltaQReported = 0.0;
-        if( qReportedRecalDatum != null ) {
+        if (qReportedRecalDatum != null) {
             final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
             deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
         }
@@ -481,17 +487,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
         double deltaQCovariateEmpirical;
         covariateCollapsedKey[0] = key[0];
         covariateCollapsedKey[1] = key[1];
-        for( int iii = 2; iii < key.length; iii++ ) {
-            covariateCollapsedKey[2] =  key[iii]; // The given covariate
-            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
-            if( covariateRecalDatum != null ) {
+        for (int iii = 2; iii < key.length; iii++) {
+            covariateCollapsedKey[2] = key[iii]; // The given covariate
+            final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii).get(covariateCollapsedKey));
+            if (covariateRecalDatum != null) {
                 deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
-                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
+                deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
             }
         }
 
         final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
-        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
+        return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
 
         // Verbose printouts used to validate with old recalibrator
         //if(key.contains(null)) {
@@ -508,12 +514,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
     /**
      * Loop over the list of qualities and overwrite the newly recalibrated score to be the original score if it was less than some threshold
+     *
      * @param originalQuals The list of original base quality scores
-     * @param recalQuals A list of the new recalibrated quality scores
+     * @param recalQuals    A list of the new recalibrated quality scores
      */
-    private void preserveQScores( final byte[] originalQuals, final byte[] recalQuals ) {
-        for( int iii = 0; iii < recalQuals.length; iii++ ) {
-            if( originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN ) {
+    private void preserveQScores(final byte[] originalQuals, final byte[] recalQuals) {
+        for (int iii = 0; iii < recalQuals.length; iii++) {
+            if (originalQuals[iii] < PRESERVE_QSCORES_LESS_THAN) {
                 recalQuals[iii] = originalQuals[iii];
             }
         }
@@ -527,6 +534,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
     /**
      * Start the reduce with a handle to the output bam file
+     *
      * @return A FileWriter pointing to a new bam file
      */
     public SAMFileWriter reduceInit() {
@@ -535,12 +543,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
     /**
      * Output each read to disk
-     * @param read The read to output
+     *
+     * @param read   The read to output
      * @param output The FileWriter to write the read to
      * @return The FileWriter
      */
-    public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
-        if( output != null ) {
+    public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
+        if (output != null) {
             output.addAlignment(read);
         }
         return output;
@@ -548,20 +557,22 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
 
     /**
      * Do nothing
+     *
      * @param output The SAMFileWriter that outputs the bam file
      */
     public void onTraversalDone(SAMFileWriter output) {
-        if( numReadsWithMalformedColorSpace != 0 ) {
-            if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
+        if (numReadsWithMalformedColorSpace != 0) {
+            if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) {
                 Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
-                    "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
-                    "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
-                    "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
-            } else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
+                        "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
+                        "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
+                        "These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
+            }
+            else if (RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ) {
                 Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
-                    "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
-                    "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
-                    "These reads were completely removed from the output bam file.");
+                        "because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
+                        "for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
+                        "These reads were completely removed from the output bam file.");
 
             }
         }

From e89887cd8e69a9dc8ecdf0df120c298a6994d808 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 7 Feb 2012 18:11:53 -0500
Subject: [PATCH 25/67] laying groundwork to have insertions and deletions
 going through the system.

---
 .../recalibration/RecalDataManager.java       |  17 +-
 .../broadinstitute/sting/utils/BaseUtils.java | 282 ++++++++++--------
 2 files changed, 163 insertions(+), 136 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 72c2b2829..311e33f8a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -284,7 +285,7 @@ public class RecalDataManager {
     public static void parseColorSpace(final GATKSAMRecord read) {
 
         // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
-        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+        if (ReadUtils.isSOLiDRead(read)) {
             if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
                 final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
                 if (attr != null) {
@@ -382,7 +383,7 @@ public class RecalDataManager {
     }
 
     public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
-        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+        if (ReadUtils.isSOLiDRead(read)) {
             final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
             if (attr != null) {
                 byte[] colorSpace;
@@ -611,21 +612,17 @@ public class RecalDataManager {
         final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
         final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
 
-        // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
-        for (int i = 0; i < numRequestedCovariates; i++) {
+        for (int i = 0; i < numRequestedCovariates; i++) {                              // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
             requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
-            for (int j = 0; j < readLength; j++) {
-                //copy values into a 2D array that allows all covar types to be extracted at once for
-                //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
-                covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
-            }
+            for (int j = 0; j < readLength; j++)
+                covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];    // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
         }
 
         return covariateValues_offset_x_covar;
     }
 
     /**
-     * Perform a ceratin transversion (A <-> C or G <-> T) on the base.
+     * Perform a certain transversion (A <-> C or G <-> T) on the base.
      *
      * @param base the base [AaCcGgTt]
      * @return the transversion of the base, or the input base if it's not one of the understood ones
diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
index 673b1524d..61812629c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -2,57 +2,59 @@ package org.broadinstitute.sting.utils;
 
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 
-
 /**
  * BaseUtils contains some basic utilities for manipulating nucleotides.
  */
 public class BaseUtils {
-    public final static byte A = (byte)'A';
-    public final static byte C = (byte)'C';
-    public final static byte G = (byte)'G';
-    public final static byte T = (byte)'T';
+    public final static byte A = (byte) 'A';
+    public final static byte C = (byte) 'C';
+    public final static byte G = (byte) 'G';
+    public final static byte T = (byte) 'T';
 
-    public final static byte N = (byte)'N';
-    public final static byte D = (byte)'D';
+    public final static byte N = (byte) 'N';
+    public final static byte D = (byte) 'D';
 
     //
     // todo -- we need a generalized base abstraction using the Base enum.
     //
-    public final static byte[] BASES = { 'A', 'C', 'G', 'T' };
-    public final static byte[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' };
+    public final static byte[] BASES = {'A', 'C', 'G', 'T'};
+    public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};
 
     public enum Base {
-        A ( 'A', 0 ),
-        C ( 'C', 1 ),
-        G ( 'G', 2 ),
-        T ( 'T', 3 );
+        A('A', 0),
+        C('C', 1),
+        G('G', 2),
+        T('T', 3);
 
         byte b;
         int index;
+
         private Base(char base, int index) {
-            this.b = (byte)base;
+            this.b = (byte) base;
             this.index = index;
         }
 
         public byte getBase() { return b; }
-        public char getBaseAsChar() { return (char)b; }
+
+        public char getBaseAsChar() { return (char) b; }
+
         public int getIndex() { return index; }
 
         public boolean sameBase(byte o) { return b == o; }
-        public boolean sameBase(char o) { return b == (byte)o; }
-        public boolean sameBase(int i)  { return index == i; }
-    }
 
+        public boolean sameBase(char o) { return b == (byte) o; }
+
+        public boolean sameBase(int i) { return index == i; }
+    }
 
     // todo -- fix me (enums?)
     public static final byte DELETION_INDEX = 4;
     public static final byte NO_CALL_INDEX = 5; // (this is 'N')
 
-    public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte)'G');
-    public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte)'C');
-    public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte)'A');
-    public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte)'T');
-
+    public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
+    public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
+    public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
+    public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
 
     /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
     // a pyrimidine to another pyrimidine nucleotide (C <-> T).
@@ -64,28 +66,31 @@ public class BaseUtils {
 
     /**
      * Returns the base substitution type of the 2 state SNP
+     *
      * @param base1
      * @param base2
      * @return
      */
-    public static BaseSubstitutionType SNPSubstitutionType( byte base1, byte base2 ) {
+    public static BaseSubstitutionType SNPSubstitutionType(byte base1, byte base2) {
         BaseSubstitutionType t = isTransition(base1, base2) ? BaseSubstitutionType.TRANSITION : BaseSubstitutionType.TRANSVERSION;
         //System.out.printf("SNPSubstitutionType( char %c, char %c ) => %s%n", base1, base2, t);
         return t;
     }
 
-    public static boolean isTransition( byte base1, byte  base2 ) {
+    public static boolean isTransition(byte base1, byte base2) {
         int b1 = simpleBaseToBaseIndex(base1);
         int b2 = simpleBaseToBaseIndex(base2);
         return b1 == 0 && b2 == 2 || b1 == 2 && b2 == 0 ||
-               b1 == 1 && b2 == 3 || b1 == 3 && b2 == 1;
+                b1 == 1 && b2 == 3 || b1 == 3 && b2 == 1;
     }
 
-    public static boolean isTransversion( byte  base1, byte  base2 ) {
-        return ! isTransition(base1, base2);
+    public static boolean isTransversion(byte base1, byte base2) {
+        return !isTransition(base1, base2);
     }
 
-    /** Private constructor.  No instantiating this class! */
+    /**
+     * Private constructor.  No instantiating this class!
+     */
     private BaseUtils() {}
 
     static public boolean basesAreEqual(byte base1, byte base2) {
@@ -96,7 +101,6 @@ public class BaseUtils {
         return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
     }
 
-
     /**
      * Converts a IUPAC nucleotide code to a pair of bases
      *
@@ -163,33 +167,37 @@ public class BaseUtils {
     /**
      * Converts a simple base to a base index
      *
-     * @param base  [AaCcGgTt]
+     * @param base [AaCcGgTt]
      * @return 0, 1, 2, 3, or -1 if the base can't be understood
      */
     static public int simpleBaseToBaseIndex(byte base) {
         switch (base) {
             case '*':               // the wildcard character counts as an A
             case 'A':
-            case 'a': return 0;
+            case 'a':
+                return 0;
 
             case 'C':
-            case 'c': return 1;
+            case 'c':
+                return 1;
 
             case 'G':
-            case 'g': return 2;
+            case 'g':
+                return 2;
 
             case 'T':
-            case 't': return 3;
+            case 't':
+                return 3;
 
-            default: return -1;
+            default:
+                return -1;
         }
     }
 
-
     /**
      * Converts a simple base to a base index
      *
-     * @param base  [AaCcGgTt]
+     * @param base [AaCcGgTt]
      * @return 0, 1, 2, 3, or -1 if the base can't be understood
      */
     @Deprecated
@@ -197,29 +205,37 @@ public class BaseUtils {
         switch (base) {
             case '*':               // the wildcard character counts as an A
             case 'A':
-            case 'a': return 0;
+            case 'a':
+                return 0;
 
             case 'C':
-            case 'c': return 1;
+            case 'c':
+                return 1;
 
             case 'G':
-            case 'g': return 2;
+            case 'g':
+                return 2;
 
             case 'T':
-            case 't': return 3;
+            case 't':
+                return 3;
 
-            default: return -1;
+            default:
+                return -1;
         }
     }
 
     static public int extendedBaseToBaseIndex(byte base) {
         switch (base) {
             case 'd':
-            case 'D': return DELETION_INDEX;
+            case 'D':
+                return DELETION_INDEX;
             case 'n':
-            case 'N': return NO_CALL_INDEX;
+            case 'N':
+                return NO_CALL_INDEX;
 
-            default: return simpleBaseToBaseIndex(base);
+            default:
+                return simpleBaseToBaseIndex(base);
         }
     }
 
@@ -232,11 +248,6 @@ public class BaseUtils {
         return simpleBaseToBaseIndex(base) != -1;
     }
 
-    @Deprecated
-    static public boolean isNBase(char base) {
-        return isNBase((byte)base);
-    }
-
     static public boolean isNBase(byte base) {
         return base == 'N' || base == 'n';
     }
@@ -244,68 +255,83 @@ public class BaseUtils {
     /**
      * Converts a base index to a simple base
      *
-     * @param baseIndex  0, 1, 2, 3
+     * @param baseIndex 0, 1, 2, 3
      * @return A, C, G, T, or '.' if the index can't be understood
      */
     static public byte baseIndexToSimpleBase(int baseIndex) {
         switch (baseIndex) {
-            case 0: return 'A';
-            case 1: return 'C';
-            case 2: return 'G';
-            case 3: return 'T';
-            default: return '.';
+            case 0:
+                return 'A';
+            case 1:
+                return 'C';
+            case 2:
+                return 'G';
+            case 3:
+                return 'T';
+            default:
+                return '.';
         }
     }
 
     @Deprecated
     static public char baseIndexToSimpleBaseAsChar(int baseIndex) {
-        return (char)baseIndexToSimpleBase(baseIndex);
+        return (char) baseIndexToSimpleBase(baseIndex);
     }
 
     /**
      * Converts a base index to a base index representing its cross-talk partner
      *
-     * @param baseIndex  0, 1, 2, 3
+     * @param baseIndex 0, 1, 2, 3
      * @return 1, 0, 3, 2, or -1 if the index can't be understood
      */
     static public int crossTalkPartnerIndex(int baseIndex) {
         switch (baseIndex) {
-            case 0: return 1; // A -> C
-            case 1: return 0; // C -> A
-            case 2: return 3; // G -> T
-            case 3: return 2; // T -> G
-            default: return -1;
+            case 0:
+                return 1; // A -> C
+            case 1:
+                return 0; // C -> A
+            case 2:
+                return 3; // G -> T
+            case 3:
+                return 2; // T -> G
+            default:
+                return -1;
         }
     }
 
     /**
      * Converts a base to the base representing its cross-talk partner
      *
-     * @param base  [AaCcGgTt]
+     * @param base [AaCcGgTt]
      * @return C, A, T, G, or '.' if the base can't be understood
      */
     @Deprecated
     static public char crossTalkPartnerBase(char base) {
-        return (char)baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
+        return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
     }
 
     /**
      * Return the complement of a base index.
      *
-     * @param baseIndex  the base index (0:A, 1:C, 2:G, 3:T)
+     * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
      * @return the complementary base index
      */
     static public byte complementIndex(int baseIndex) {
         switch (baseIndex) {
-            case 0: return 3; // a -> t
-            case 1: return 2; // c -> g
-            case 2: return 1; // g -> c
-            case 3: return 0; // t -> a
-            default: return -1; // wtf?
+            case 0:
+                return 3; // a -> t
+            case 1:
+                return 2; // c -> g
+            case 2:
+                return 1; // g -> c
+            case 3:
+                return 0; // t -> a
+            default:
+                return -1; // wtf?
         }
     }
 
-   /**
+    /**
      * Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
      *
      * @param base the base [AaCcGgTt]
@@ -314,20 +340,25 @@ public class BaseUtils {
     static public byte simpleComplement(byte base) {
         switch (base) {
             case 'A':
-            case 'a': return 'T';
+            case 'a':
+                return 'T';
             case 'C':
-            case 'c': return 'G';
+            case 'c':
+                return 'G';
             case 'G':
-            case 'g': return 'C';
+            case 'g':
+                return 'C';
             case 'T':
-            case 't': return 'A';
-            default: return base;
+            case 't':
+                return 'A';
+            default:
+                return base;
         }
     }
 
     @Deprecated
     static public char simpleComplement(char base) {
-        return (char)simpleComplement((byte)base);
+        return (char) simpleComplement((byte) base);
     }
 
     /**
@@ -349,7 +380,7 @@ public class BaseUtils {
     /**
      * Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
      *
-     * @param bases  the byte array of bases
+     * @param bases the byte array of bases
      * @return the complement of the base byte array
      */
     static public byte[] simpleComplement(byte[] bases) {
@@ -382,7 +413,7 @@ public class BaseUtils {
     /**
      * Complement a char array of bases
      *
-     * @param bases  the char array of bases
+     * @param bases the char array of bases
      * @return the complement of the base char array
      */
     @Deprecated
@@ -399,7 +430,7 @@ public class BaseUtils {
     /**
      * Reverse complement a String of bases.  Preserves ambiguous bases.
      *
-     * @param bases  the String of bases
+     * @param bases the String of bases
      * @return the reverse complement of the String
      */
     @Deprecated
@@ -407,11 +438,10 @@ public class BaseUtils {
         return new String(simpleReverseComplement(bases.getBytes()));
     }
 
-
     /**
      * Complement a String of bases.  Preserves ambiguous bases.
      *
-     * @param bases  the String of bases
+     * @param bases the String of bases
      * @return the complement of the String
      */
     @Deprecated
@@ -451,7 +481,7 @@ public class BaseUtils {
     /**
      * Returns the most common base in the basecounts array. To be used with pileup.getBaseCounts.
      *
-     * @param  baseCounts counts of a,c,g,t in order.
+     * @param baseCounts counts of a,c,g,t in order.
      * @return the most common base
      */
     static public byte mostFrequentSimpleBase(int[] baseCounts) {
@@ -461,13 +491,13 @@ public class BaseUtils {
     /**
      * For the most frequent base in the sequence, return the percentage of the read it constitutes.
      *
-     * @param sequence  the read sequence
-     * @return  the percentage of the read that's made up of the most frequent base
+     * @param sequence the read sequence
+     * @return the percentage of the read that's made up of the most frequent base
      */
     static public double mostFrequentBaseFraction(byte[] sequence) {
         int[] baseCounts = new int[4];
 
-        for ( byte base : sequence ) {
+        for (byte base : sequence) {
             int baseIndex = simpleBaseToBaseIndex(base);
 
             if (baseIndex >= 0) {
@@ -477,7 +507,7 @@ public class BaseUtils {
 
         int mostFrequentBaseIndex = mostFrequentBaseIndex(baseCounts);
 
-        return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
+        return ((double) baseCounts[mostFrequentBaseIndex]) / ((double) sequence.length);
     }
 
     // --------------------------------------------------------------------------------
@@ -531,50 +561,50 @@ public class BaseUtils {
     static public byte getRandomBase(char excludeBase) {
         return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
     }
-    
-    
-    /** Computes the smallest period >= minPeriod for the specified string. The period is defined as such p, 
+
+    /**
+     * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
      * that for all  i = 0... seq.length-1,  seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
-     *  The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period 
-     *  of 2 (it has a period of 4 as well), and so does
-     * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is 
+     * The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
+     * of 2 (it has a period of 4 as well), and so does
+     * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
      * the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range
      * or if specified minPeriod is greater than the sequence length.
-     *   
+     *
      * @param seq
      * @return
      */
     public static int sequencePeriod(byte[] seq, int minPeriod) {
-    	int period = ( minPeriod > seq.length ? seq.length : minPeriod );
-    	// we assume that bases [0,period-1] repeat themselves and check this assumption
-    	// until we find correct period
-    	
-    	for ( int pos = period ; pos < seq.length ; pos++ ) {
-    		
-    		int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
-    		                                                // if our current hypothesis holds, base[pos] must be the same as base[offset]
-    		
-    		if ( Character.toUpperCase( seq[pos] ) !=
-    				Character.toUpperCase( seq[offset] )
-    			) {
-    			
-    			// period we have been trying so far does not work.
-    			// two possibilities:
-    			// A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
-    			//      in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
-    		   //       so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance 
-    			//      pos will be autoincremented and we will be checking next base
-    			// B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
-    			//     hence we should first check if it matches the first base of the sequence, and to do that
-    			//     we set period to pos  (thus trying the hypothesis that bases from start up to the current one,
-    			//     non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
-    			// on the next loop re-entrance after pos is autoincremented)
-    			if ( offset == 0 ) period = pos+1;
-    			else period = pos-- ;
-    		
-    		} 
-    	}
-    	return period;
+        int period = (minPeriod > seq.length ? seq.length : minPeriod);
+        // we assume that bases [0,period-1] repeat themselves and check this assumption
+        // until we find correct period
+
+        for (int pos = period; pos < seq.length; pos++) {
+
+            int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
+            // if our current hypothesis holds, base[pos] must be the same as base[offset]
+
+            if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
+
+                // period we have been trying so far does not work.
+                // two possibilities:
+                // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
+                //      in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
+                //       so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
+                //      pos will be autoincremented and we will be checking next base
+                // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
+                //     hence we should first check if it matches the first base of the sequence, and to do that
+                //     we set period to pos  (thus trying the hypothesis that bases from start up to the current one,
+                //     non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
+                // on the next loop re-entrance after pos is autoincremented)
+                if (offset == 0)
+                    period = pos + 1;
+                else
+                    period = pos--;
+
+            }
+        }
+        return period;
     }
 }
 

From c0c676590b49d3d384aebc716a0ac2388f9c850f Mon Sep 17 00:00:00 2001
From: Roger Zurawicki <roger@broadinstitute.org>
Date: Mon, 9 Jan 2012 23:46:48 -0500
Subject: [PATCH 26/67] First implementation of GATKReportGatherer

- Added the GATKReportGatherer
- Added private methods in GATKReport to combine Tables and Reports
- It is very conservative and it will only gather if the table columns, match.
- At the column level it uses the (redundant) row ids to add new rows. It will throw an exception if it is overwriting data.
Added the gatherer functions to CoverageByRG

Also added the scatterCount parameter in the Interval Coverage script
Made some more GATKReport methods public

The UnitTest included shows that the merging methods work
Added a getter for the PrimaryKeyName
Fixed bugs that prevented the gatherer form working

Working GATKReportGatherer
Has only the functional to addLines
The input file parser assumes that the first column is the primary key

Signed-off-by: Mauricio Carneiro <carneiro@broadinstitute.org>
---
 .../sting/gatk/report/GATKReport.java         |  65 +++--
 .../sting/gatk/report/GATKReportGatherer.java |  46 ++++
 .../sting/gatk/report/GATKReportTable.java    | 252 +++++++++++-------
 .../sting/gatk/report/GATKReportUnitTest.java | 128 +++++++--
 4 files changed, 357 insertions(+), 134 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 608b5d1d0..c0abe7450 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -24,7 +24,8 @@ public class GATKReport {
 
     /**
      * Create a new GATKReport with the contents of a GATKReport on disk.
-     * @param filename  the path to the file to load
+     *
+     * @param filename the path to the file to load
      */
     public GATKReport(String filename) {
         this(new File(filename));
@@ -32,7 +33,8 @@ public class GATKReport {
 
     /**
      * Create a new GATKReport with the contents of a GATKReport on disk.
-     * @param file  the file to load
+     *
+     * @param file the file to load
      */
     public GATKReport(File file) {
         loadReport(file);
@@ -40,7 +42,8 @@ public class GATKReport {
 
     /**
      * Load a GATKReport file from disk
-     * @param file  the file to load
+     *
+     * @param file the file to load
      */
     private void loadReport(File file) {
         try {
@@ -48,12 +51,11 @@ public class GATKReport {
 
             GATKReportTable table = null;
             String[] header = null;
-            int id = 0;
             GATKReportVersion version = null;
             List<Integer> columnStarts = null;
 
             String line;
-            while ( (line = reader.readLine()) != null ) {
+            while ((line = reader.readLine()) != null) {
 
                 if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
 
@@ -71,7 +73,7 @@ public class GATKReport {
 
                     header = null;
                     columnStarts = null;
-                } else if ( line.trim().isEmpty() ) {
+                } else if (line.trim().isEmpty()) {
                     // do nothing
                 } else {
                     if (table != null) {
@@ -97,19 +99,22 @@ public class GATKReport {
                         if (header == null) {
                             header = splitLine;
 
-                            table.addPrimaryKey("id", false);
-
-                            for ( String columnName : header ) {
-                                table.addColumn(columnName, "");
+                            // Set the first column as the primary key
+                            table.addPrimaryKey(header[0]);
+                            // Set every other column as column
+                            for (int i = 1; i < header.length; i++) {
+                                table.addColumn(header[i], "");
                             }
 
-                            id = 0;
                         } else {
-                            for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
-                                table.set(id, header[columnIndex], splitLine[columnIndex]);
+                            //Get primary key Value from the current line array
+                            String primaryKey = splitLine[0];
+                            //Input all the remaining values
+                            for (int columnIndex = 1; columnIndex < header.length; columnIndex++) {
+                                table.set(primaryKey, header[columnIndex], splitLine[columnIndex]);
                             }
 
-                            id++;
+
                         }
                     }
                 }
@@ -124,8 +129,8 @@ public class GATKReport {
     /**
      * Add a new table to the collection
      *
-     * @param tableName  the name of the table
-     * @param tableDescription  the description of the table
+     * @param tableName        the name of the table
+     * @param tableDescription the description of the table
      */
     public void addTable(String tableName, String tableDescription) {
         addTable(tableName, tableDescription, true);
@@ -139,7 +144,7 @@ public class GATKReport {
     /**
      * Return true if table with a given name exists
      *
-     * @param tableName  the name of the table
+     * @param tableName the name of the table
      * @return true if the table exists, false otherwise
      */
     public boolean hasTable(String tableName) {
@@ -149,8 +154,8 @@ public class GATKReport {
     /**
      * Return a table with a given name
      *
-     * @param tableName  the name of the table
-     * @return  the table object
+     * @param tableName the name of the table
+     * @return the table object
      */
     public GATKReportTable getTable(String tableName) {
         GATKReportTable table = tables.get(tableName);
@@ -162,7 +167,7 @@ public class GATKReport {
     /**
      * Print all tables contained within this container to a PrintStream
      *
-     * @param out  the PrintStream to which the tables should be written
+     * @param out the PrintStream to which the tables should be written
      */
     public void print(PrintStream out) {
         for (GATKReportTable table : tables.values()) {
@@ -175,4 +180,24 @@ public class GATKReport {
     public Collection<GATKReportTable> getTables() {
         return tables.values();
     }
+
+    public void combineWith(GATKReport input) {
+
+        // For every input table, add values
+        System.out.println("This.tables: keySet");
+        for (String s : tables.keySet())
+            System.out.println(s);
+
+        // todo test tables exist
+
+
+        for (String tableName : input.tables.keySet()) {
+            System.out.println("Input table key: " + tableName);
+            if (tables.containsKey(tableName))
+                tables.get(tableName).mergeRows(input.getTable(tableName));
+            else
+                throw new ReviewedStingException("Failed to combine GATKReport, tables don't match!");
+        }
+
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java
new file mode 100644
index 000000000..0d15971ae
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java
@@ -0,0 +1,46 @@
+package org.broadinstitute.sting.gatk.report;
+
+import org.broadinstitute.sting.commandline.Gatherer;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.List;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: roger
+ * Date: 1/9/12
+ * Time: 11:17 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class GATKReportGatherer extends Gatherer {
+    @Override
+    public void gather(List<File> inputs, File output) {
+        //Combines inputs GATKReport to one output
+
+        PrintStream o;
+        try {
+            o = new PrintStream(output);
+        } catch (FileNotFoundException e) {
+            throw new UserException("File to be output by CoverageByRG Gather function was not found");
+        }
+
+        GATKReport current = new GATKReport();
+        boolean isFirst = true;
+        for (File input : inputs) {
+
+            // If the table is empty
+            if (isFirst) {
+                current = new GATKReport(input);
+                isFirst = false;
+            } else {
+                GATKReport toAdd = new GATKReport(input);
+                current.combineWith(toAdd);
+            }
+        }
+
+        current.print(o);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
index b72b20e0b..ac18891d7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@@ -4,7 +4,10 @@ import org.apache.commons.lang.ObjectUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.io.PrintStream;
-import java.util.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -12,12 +15,12 @@ import java.util.regex.Pattern;
  * A data structure that allows data to be collected over the course of a walker's computation, then have that data
  * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the
  * GATKReport loader module).
- *
+ * <p/>
  * The goal of this object is to use the same data structure for both accumulating data during a walker's computation
  * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of
  * results).  Thus, all of the infrastructure below is designed simply to make printing the following as easy as
  * possible:
- *
+ * <p/>
  * ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads
  * cycle  errorrate.61PA8.7         qualavg.61PA8.7
  * 0      0.007451835696110506      25.474613284804366
@@ -29,60 +32,60 @@ import java.util.regex.Pattern;
  * 6      5.452562704471102E-4      36.1217248908297
  * 7      5.452562704471102E-4      36.1910480349345
  * 8      5.452562704471102E-4      36.00345705967977
- *
+ * <p/>
  * Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data.  Every single
  * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed
  * together, which makes it very easy to pull tables from different programs into R via a single file.
- *
+ * <p/>
  * ------------
  * Definitions:
- *
+ * <p/>
  * Table info:
- *   The first line, structured as
- *     ##:<report version> <table name> : <table description>
- *
+ * The first line, structured as
+ * ##:<report version> <table name> : <table description>
+ * <p/>
  * Table header:
- *   The second line, specifying a unique name for each column in the table.
- *
- *   The first column mentioned in the table header is the "primary key" column - a column that provides the unique
- *   identifier for each row in the table.  Once this column is created, any element in the table can be referenced by
- *   the row-column coordinate, i.e. "primary key"-"column name" coordinate.
- *
- *   When a column is added to a table, a default value must be specified (usually 0).  This is the initial value for
- *   an element in a column.  This permits operations like increment() and decrement() to work properly on columns that
- *   are effectively counters for a particular event.
- *
- *   Finally, the display property for each column can be set during column creation.  This is useful when a given
- *   column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
- *   In these cases, it's obviously necessary to store the value required for further computation, but it's not
- *   necessary to actually print the intermediate column.
- *
+ * The second line, specifying a unique name for each column in the table.
+ * <p/>
+ * The first column mentioned in the table header is the "primary key" column - a column that provides the unique
+ * identifier for each row in the table.  Once this column is created, any element in the table can be referenced by
+ * the row-column coordinate, i.e. "primary key"-"column name" coordinate.
+ * <p/>
+ * When a column is added to a table, a default value must be specified (usually 0).  This is the initial value for
+ * an element in a column.  This permits operations like increment() and decrement() to work properly on columns that
+ * are effectively counters for a particular event.
+ * <p/>
+ * Finally, the display property for each column can be set during column creation.  This is useful when a given
+ * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
+ * In these cases, it's obviously necessary to store the value required for further computation, but it's not
+ * necessary to actually print the intermediate column.
+ * <p/>
  * Table body:
- *   The values of the table itself.
- *
+ * The values of the table itself.
+ * <p/>
  * ---------------
  * Implementation:
- *
+ * <p/>
  * The implementation of this table has two components:
- *   1. A TreeSet<Object> that stores all the values ever specified for the primary key.  Any get() operation that
- *      refers to an element where the primary key object does not exist will result in its implicit creation.  I
- *      haven't yet decided if this is a good idea...
- *
- *   2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents.  Each
- *      GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
- *      primary key and the column value.  This means that, given N columns, the primary key information is stored
- *      N+1 times.  This is obviously wasteful and can likely be handled much more elegantly in future implementations.
- *
+ * 1. A TreeSet<Object> that stores all the values ever specified for the primary key.  Any get() operation that
+ * refers to an element where the primary key object does not exist will result in its implicit creation.  I
+ * haven't yet decided if this is a good idea...
+ * <p/>
+ * 2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents.  Each
+ * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
+ * primary key and the column value.  This means that, given N columns, the primary key information is stored
+ * N+1 times.  This is obviously wasteful and can likely be handled much more elegantly in future implementations.
+ * <p/>
  * ------------------------------
  * Element and column operations:
- *
+ * <p/>
  * In addition to simply getting and setting values, this object also permits some simple operations to be applied to
  * individual elements or to whole columns.  For instance, an element can be easily incremented without the hassle of
  * calling get(), incrementing the obtained value by 1, and then calling set() with the new value.  Also, some vector
  * operations are supported.  For instance, two whole columns can be divided and have the result be set to a third
  * column.  This is especially useful when aggregating counts in two intermediate columns that will eventually need to
  * be manipulated row-by-row to compute the final column.
- *
+ * <p/>
  * Note: I've made no attempt whatsoever to make these operations efficient.  Right now, some of the methods check the
  * type of the stored object using an instanceof call and attempt to do the right thing.  Others cast the contents of
  * the cell to a Number, call the Number.toDouble() method and compute a result.  This is clearly not the ideal design,
@@ -92,7 +95,9 @@ import java.util.regex.Pattern;
  * @author Khalid Shakir
  */
 public class GATKReportTable {
-    /** REGEX that matches any table with an invalid name */
+    /**
+     * REGEX that matches any table with an invalid name
+     */
     public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
     private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
     private String tableName;
@@ -109,8 +114,8 @@ public class GATKReportTable {
     /**
      * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
      *
-     * @param name  the name of the table or column
-     * @return  true if the name is valid, false if otherwise
+     * @param name the name of the table or column
+     * @return true if the name is valid, false if otherwise
      */
     private boolean isValidName(String name) {
         Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
@@ -122,8 +127,8 @@ public class GATKReportTable {
     /**
      * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
      *
-     * @param description  the name of the table or column
-     * @return  true if the name is valid, false if otherwise
+     * @param description the name of the table or column
+     * @return true if the name is valid, false if otherwise
      */
     private boolean isValidDescription(String description) {
         Pattern p = Pattern.compile("\\r|\\n");
@@ -135,15 +140,15 @@ public class GATKReportTable {
     /**
      * Construct a new GATK report table with the specified name and description
      *
-     * @param tableName  the name of the table
-     * @param tableDescription  the description of the table
+     * @param tableName        the name of the table
+     * @param tableDescription the description of the table
      */
     public GATKReportTable(String tableName, String tableDescription) {
         this(tableName, tableDescription, true);
     }
 
     public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
-         if (!isValidName(tableName)) {
+        if (!isValidName(tableName)) {
             throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'.  GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
         }
 
@@ -169,7 +174,7 @@ public class GATKReportTable {
     /**
      * Add a primary key column.  This becomes the unique identifier for every column in the table.
      *
-     * @param primaryKeyName  the name of the primary key column
+     * @param primaryKeyName the name of the primary key column
      */
     public void addPrimaryKey(String primaryKeyName) {
         addPrimaryKey(primaryKeyName, true);
@@ -178,8 +183,8 @@ public class GATKReportTable {
     /**
      * Add an optionally visible primary key column.  This becomes the unique identifier for every column in the table, and will always be printed as the first column.
      *
-     * @param primaryKeyName  the name of the primary key column
-     * @param display should this primary key be displayed?
+     * @param primaryKeyName the name of the primary key column
+     * @param display        should this primary key be displayed?
      */
     public void addPrimaryKey(String primaryKeyName, boolean display) {
         if (!isValidName(primaryKeyName)) {
@@ -195,6 +200,7 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
+     *
      * @param dottedColumnValues Period concatenated values.
      * @return The first primary key matching the column values or throws an exception.
      */
@@ -208,6 +214,7 @@ public class GATKReportTable {
     /**
      * Returns true if there is at least on row with the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
+     *
      * @param dottedColumnValues Period concatenated values.
      * @return true if there is at least one row matching the columns.
      */
@@ -218,6 +225,7 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
+     *
      * @param dottedColumnValues Period concatenated values.
      * @return The first primary key matching the column values or null.
      */
@@ -228,6 +236,7 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the column values.
      * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
+     *
      * @param columnValues column values.
      * @return The first primary key matching the column values.
      */
@@ -235,7 +244,7 @@ public class GATKReportTable {
         for (Object primaryKey : primaryKeyColumn) {
             boolean matching = true;
             for (int i = 0; matching && i < columnValues.length; i++) {
-                matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
+                matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1));
             }
             if (matching)
                 return primaryKey;
@@ -246,8 +255,8 @@ public class GATKReportTable {
     /**
      * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
      *
-     * @param columnName  the name of the column
-     * @param defaultValue  the default value for the column
+     * @param columnName   the name of the column
+     * @param defaultValue the default value for the column
      */
     public void addColumn(String columnName, Object defaultValue) {
         addColumn(columnName, defaultValue, null);
@@ -256,12 +265,13 @@ public class GATKReportTable {
     public void addColumn(String columnName, Object defaultValue, String format) {
         addColumn(columnName, defaultValue, true, format);
     }
+
     /**
      * Add a column to the report, specify the default column value, and specify whether the column should be displayed in the final output (useful when intermediate columns are necessary for later calculations, but are not required to be in the output file.
      *
-     * @param columnName  the name of the column
-     * @param defaultValue  the default value of the column
-     * @param display  if true - the column will be displayed; if false - the column will be hidden
+     * @param columnName   the name of the column
+     * @param defaultValue the default value of the column
+     * @param display      if true - the column will be displayed; if false - the column will be hidden
      */
     public void addColumn(String columnName, Object defaultValue, boolean display) {
         addColumn(columnName, defaultValue, display, null);
@@ -277,8 +287,8 @@ public class GATKReportTable {
     /**
      * Check if the requested element exists, and if not, create it.
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
      */
     private void verifyEntry(Object primaryKey, String columnName) {
         if (!columns.containsKey(columnName)) {
@@ -299,9 +309,9 @@ public class GATKReportTable {
     /**
      * Set the value for a given position in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
-     * @param value  the value to set
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
+     * @param value      the value to set
      */
     public void set(Object primaryKey, String columnName, Object value) {
         verifyEntry(primaryKey, columnName);
@@ -312,13 +322,13 @@ public class GATKReportTable {
     /**
      * Get a value from the given position in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
-     * @return  the value stored at the specified position in the table
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
+     * @return the value stored at the specified position in the table
      */
     public Object get(Object primaryKey, String columnName) {
         verifyEntry(primaryKey, columnName);
-        
+
         return columns.get(columnName).get(primaryKey);
     }
 
@@ -327,7 +337,7 @@ public class GATKReportTable {
      *
      * @param primaryKey  the primary key value
      * @param columnIndex the index of the column
-     * @return  the value stored at the specified position in the table
+     * @return the value stored at the specified position in the table
      */
     private Object get(Object primaryKey, int columnIndex) {
         return columns.getByIndex(columnIndex).get(primaryKey);
@@ -336,8 +346,8 @@ public class GATKReportTable {
     /**
      * Increment an element in the table.  This implementation is awful - a functor would probably be better.
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
      */
     public void increment(Object primaryKey, String columnName) {
         Object oldValue = get(primaryKey, columnName);
@@ -365,8 +375,8 @@ public class GATKReportTable {
     /**
      * Decrement an element in the table.  This implementation is awful - a functor would probably be better.
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
      */
     public void decrement(Object primaryKey, String columnName) {
         Object oldValue = get(primaryKey, columnName);
@@ -394,9 +404,9 @@ public class GATKReportTable {
     /**
      * Add the specified value to an element in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
-     * @param valueToAdd  the value to add
+     * @param primaryKey the primary key value
+     * @param columnName the name of the column
+     * @param valueToAdd the value to add
      */
     public void add(Object primaryKey, String columnName, Object valueToAdd) {
         Object oldValue = get(primaryKey, columnName);
@@ -424,8 +434,8 @@ public class GATKReportTable {
     /**
      * Subtract the specified value from an element in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
+     * @param primaryKey      the primary key value
+     * @param columnName      the name of the column
      * @param valueToSubtract the value to subtract
      */
     public void subtract(Object primaryKey, String columnName, Object valueToSubtract) {
@@ -454,9 +464,9 @@ public class GATKReportTable {
     /**
      * Multiply the specified value to an element in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
-     * @param valueToMultiply  the value to multiply by
+     * @param primaryKey      the primary key value
+     * @param columnName      the name of the column
+     * @param valueToMultiply the value to multiply by
      */
     public void multiply(Object primaryKey, String columnName, Object valueToMultiply) {
         Object oldValue = get(primaryKey, columnName);
@@ -484,9 +494,9 @@ public class GATKReportTable {
     /**
      * Divide the specified value from an element in the table
      *
-     * @param primaryKey  the primary key value
-     * @param columnName  the name of the column
-     * @param valueToDivide  the value to divide by
+     * @param primaryKey    the primary key value
+     * @param columnName    the name of the column
+     * @param valueToDivide the value to divide by
      */
     public void divide(Object primaryKey, String columnName, Object valueToDivide) {
         Object oldValue = get(primaryKey, columnName);
@@ -514,9 +524,9 @@ public class GATKReportTable {
     /**
      * Add two columns to each other and set the results to a third column
      *
-     * @param columnToSet  the column that should hold the results
-     * @param augend  the column that shall be the augend
-     * @param addend  the column that shall be the addend
+     * @param columnToSet the column that should hold the results
+     * @param augend      the column that shall be the augend
+     * @param addend      the column that shall be the addend
      */
     public void addColumns(String columnToSet, String augend, String addend) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -532,8 +542,8 @@ public class GATKReportTable {
     /**
      * Subtract one column from another and set the results to a third column
      *
-     * @param columnToSet  the column that should hold the results
-     * @param minuend  the column that shall be the minuend (the a in a - b)
+     * @param columnToSet the column that should hold the results
+     * @param minuend     the column that shall be the minuend (the a in a - b)
      * @param subtrahend  the column that shall be the subtrahend (the b in a - b)
      */
     public void subtractColumns(String columnToSet, String minuend, String subtrahend) {
@@ -551,8 +561,8 @@ public class GATKReportTable {
      * Multiply two columns by each other and set the results to a third column
      *
      * @param columnToSet  the column that should hold the results
-     * @param multiplier  the column that shall be the multiplier
-     * @param multiplicand  the column that shall be the multiplicand
+     * @param multiplier   the column that shall be the multiplier
+     * @param multiplicand the column that shall be the multiplicand
      */
     public void multiplyColumns(String columnToSet, String multiplier, String multiplicand) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -568,9 +578,9 @@ public class GATKReportTable {
     /**
      * Divide two columns by each other and set the results to a third column
      *
-     * @param columnToSet  the column that should hold the results
-     * @param numeratorColumn  the column that shall be the numerator
-     * @param denominatorColumn  the column that shall be the denominator
+     * @param columnToSet       the column that should hold the results
+     * @param numeratorColumn   the column that shall be the numerator
+     * @param denominatorColumn the column that shall be the denominator
      */
     public void divideColumns(String columnToSet, String numeratorColumn, String denominatorColumn) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -585,10 +595,11 @@ public class GATKReportTable {
 
     /**
      * Return the print width of the primary key column
-     * @return  the width of the primary key column
+     *
+     * @return the width of the primary key column
      */
     public int getPrimaryKeyColumnWidth() {
-        int maxWidth = primaryKeyName.length();
+        int maxWidth = getPrimaryKeyName().length();
 
         for (Object primaryKey : primaryKeyColumn) {
             int width = primaryKey.toString().length();
@@ -604,7 +615,7 @@ public class GATKReportTable {
     /**
      * Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly.
      *
-     * @param out  the PrintStream to which the table should be written
+     * @param out the PrintStream to which the table should be written
      */
     public void write(PrintStream out) {
         // Get the column widths for everything
@@ -620,13 +631,15 @@ public class GATKReportTable {
         // Emit the table header, taking into account the padding requirement if the primary key is a hidden column
         boolean needsPadding = false;
         if (primaryKeyDisplay) {
-            out.printf(primaryKeyFormat, primaryKeyName);
+            out.printf(primaryKeyFormat, getPrimaryKeyName());
             needsPadding = true;
         }
 
         for (String columnName : columns.keySet()) {
             if (columns.get(columnName).isDisplayable()) {
-                if (needsPadding) { out.printf("  "); }
+                if (needsPadding) {
+                    out.printf("  ");
+                }
                 out.printf(columnFormats.get(columnName).getNameFormat(), columnName);
 
                 needsPadding = true;
@@ -645,7 +658,9 @@ public class GATKReportTable {
 
             for (String columnName : columns.keySet()) {
                 if (columns.get(columnName).isDisplayable()) {
-                    if (needsPadding) { out.printf("  "); }
+                    if (needsPadding) {
+                        out.printf("  ");
+                    }
                     String value = columns.get(columnName).getStringValue(primaryKey);
                     out.printf(columnFormats.get(columnName).getValueFormat(), value);
 
@@ -675,4 +690,49 @@ public class GATKReportTable {
     public GATKReportColumns getColumns() {
         return columns;
     }
+
+    public void mergeRows(GATKReportTable input) {
+        /*
+         * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows
+         * TODO: Add other combining algorithms
+         */
+
+        // Make sure the columns match AND the Primary Key
+        if (input.getColumns().keySet().equals(this.getColumns().keySet()) &&
+                input.getPrimaryKeyName().equals(this.getPrimaryKeyName())) {
+            this.addRowsFrom(input);
+        } else
+            throw new ReviewedStingException("Failed to combine GATKReportTable, columns don't match!");
+    }
+
+    public void addRowsFrom(GATKReportTable input) {
+        // add column by column
+
+        // For every column
+        for (String columnKey : input.getColumns().keySet()) {
+            GATKReportColumn current = this.getColumns().get(columnKey);
+            GATKReportColumn toAdd = input.getColumns().get(columnKey);
+            // We want to take the current column and add all the values from input
+
+            // The column is a map of values <Key, Value>
+            for (Object rowKey : toAdd.keySet()) {
+                // We add every value from toAdd to the current
+                if (!current.containsKey(rowKey)) {
+                    this.set(rowKey, columnKey, toAdd.get(rowKey));
+                    System.out.printf("Putting row with PK: %s \n", rowKey);
+                } else {
+
+                    // TODO we should be able to handle combining data by adding, averaging, etc.
+                    this.set(rowKey, columnKey, toAdd.get(rowKey));
+
+                    System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);
+                }
+            }
+        }
+
+    }
+
+    public String getPrimaryKeyName() {
+        return primaryKeyName;
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
index c9b81a9d3..77ed6972d 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
@@ -49,23 +49,23 @@ public class GATKReportUnitTest extends BaseTest {
 
     @DataProvider(name = "rightAlignValues")
     public Object[][] getRightAlignValues() {
-        return new Object[][] {
-                new Object[] {null, true},
-                new Object[] {"null", true},
-                new Object[] {"NA", true},
-                new Object[] {"0", true},
-                new Object[] {"0.0", true},
-                new Object[] {"-0", true},
-                new Object[] {"-0.0", true},
-                new Object[] {String.valueOf(Long.MAX_VALUE), true},
-                new Object[] {String.valueOf(Long.MIN_VALUE), true},
-                new Object[] {String.valueOf(Float.MIN_NORMAL), true},
-                new Object[] {String.valueOf(Double.MAX_VALUE), true},
-                new Object[] {String.valueOf(Double.MIN_VALUE), true},
-                new Object[] {String.valueOf(Double.POSITIVE_INFINITY), true},
-                new Object[] {String.valueOf(Double.NEGATIVE_INFINITY), true},
-                new Object[] {String.valueOf(Double.NaN), true},
-                new Object[] {"hello", false}
+        return new Object[][]{
+                new Object[]{null, true},
+                new Object[]{"null", true},
+                new Object[]{"NA", true},
+                new Object[]{"0", true},
+                new Object[]{"0.0", true},
+                new Object[]{"-0", true},
+                new Object[]{"-0.0", true},
+                new Object[]{String.valueOf(Long.MAX_VALUE), true},
+                new Object[]{String.valueOf(Long.MIN_VALUE), true},
+                new Object[]{String.valueOf(Float.MIN_NORMAL), true},
+                new Object[]{String.valueOf(Double.MAX_VALUE), true},
+                new Object[]{String.valueOf(Double.MIN_VALUE), true},
+                new Object[]{String.valueOf(Double.POSITIVE_INFINITY), true},
+                new Object[]{String.valueOf(Double.NEGATIVE_INFINITY), true},
+                new Object[]{String.valueOf(Double.NaN), true},
+                new Object[]{"hello", false}
         };
     }
 
@@ -73,4 +73,96 @@ public class GATKReportUnitTest extends BaseTest {
     public void testIsRightAlign(String value, boolean expected) {
         Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
     }
-}
+
+    @Test
+    public void testGATKReportGatherer() {
+
+        /*
+        GATKReportTable actual1 = new GATKReportTable("TableName", "Description");
+        actual1.addPrimaryKey("key");
+        actual1.addColumn("colA", 0);
+        actual1.addColumn("colB", 0);
+        actual1.set("row1", "colA", 1);
+        actual1.set("row1", "colB", 2);
+
+        GATKReportTable actual2 = new GATKReportTable("TableName", "Description");
+        actual2.addPrimaryKey("key");
+        actual2.addColumn("colA", 0);
+        actual2.addColumn("colB", 0);
+        actual2.set("row2", "colA", 3);
+        actual2.set("row2", "colB", 4);
+
+        GATKReportTable actual3 = new GATKReportTable("TableName", "Description");
+        actual3.addPrimaryKey("key");
+        actual3.addColumn("colA", 0);
+        actual3.addColumn("colB", 0);
+        actual3.set("row3", "colA", 5);
+        actual3.set("row3", "colB", 6);
+
+        actual1.mergeRows(actual2);
+        actual1.mergeRows(actual3);
+        actual1.write(System.out);
+        */
+
+        GATKReportTable expected = new GATKReportTable("TableName", "Description");
+        expected.addPrimaryKey("key");
+        expected.addColumn("colA", 0);
+        expected.addColumn("colB", 0);
+        expected.set("row1", "colA", 1);
+        expected.set("row1", "colB", 2);
+        expected.set("row2", "colA", 3);
+        expected.set("row2", "colB", 4);
+        expected.set("row3", "colA", 5);
+        expected.set("row3", "colB", 6);
+        expected.write(System.out);
+
+        GATKReport report1, report2, report3;
+        report1 = new GATKReport();
+        report1.addTable("TableName", "Description");
+        report1.getTable("TableName").addPrimaryKey("key");
+        report1.getTable("TableName").addColumn("colA", 0);
+        report1.getTable("TableName").addColumn("colB", 0);
+        report1.getTable("TableName").set("row1", "colA", 1);
+        report1.getTable("TableName").set("row1", "colB", 2);
+
+        report2 = new GATKReport();
+        report2.addTable("TableName", "Description");
+        report2.getTable("TableName").addPrimaryKey("key");
+        report2.getTable("TableName").addColumn("colA", 0);
+        report2.getTable("TableName").addColumn("colB", 0);
+        report2.getTable("TableName").set("row2", "colA", 3);
+        report2.getTable("TableName").set("row2", "colB", 4);
+
+        report3 = new GATKReport();
+        report3.addTable("TableName", "Description");
+        report3.getTable("TableName").addPrimaryKey("key");
+        report3.getTable("TableName").addColumn("colA", 0);
+        report3.getTable("TableName").addColumn("colB", 0);
+        report3.getTable("TableName").set("row3", "colA", 5);
+        report3.getTable("TableName").set("row3", "colB", 6);
+
+        report1.combineWith(report2);
+        report1.combineWith(report3);
+
+        report1.print(System.out);
+        /*
+          File a = new File("/home/roger/tbls/a.tbl");
+          File b = new File("/home/roger/tbls/b.tbl");
+          File c = new File("/home/roger/tbls/c.tbl");
+          File out = new File("/home/roger/tbls/out.tbl");
+
+
+          List<File> FileList = new ArrayList<File>();
+          FileList.add(a);
+          FileList.add(b);
+          FileList.add(c);
+
+          GATKReportGatherer gatherer = new GATKReportGatherer();
+          gatherer.gather(FileList, out);
+          System.out.print(out);
+        */
+
+        //Assert.assertEquals(1,1);
+
+    }
+}
\ No newline at end of file

From 337819e79176a3fd2cd41283251c926ac3046ac9 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 7 Feb 2012 19:22:32 -0500
Subject: [PATCH 27/67] disabling the test while we fix it

---
 .../broadinstitute/sting/gatk/report/GATKReportUnitTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
index 77ed6972d..b9a89fcfe 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
@@ -30,7 +30,7 @@ import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 public class GATKReportUnitTest extends BaseTest {
-    @Test
+    @Test(enabled = false)
     public void testParse() throws Exception {
         String reportPath = validationDataLocation + "exampleGATKReport.eval";
         GATKReport report = new GATKReport(reportPath);

From 23e7f1bed9e395d0a41de659fd10d8d398dc2b82 Mon Sep 17 00:00:00 2001
From: Khalid Shakir <kshakir@broadinstitute.org>
Date: Wed, 8 Feb 2012 02:12:16 -0500
Subject: [PATCH 29/67] When an interval list specifies overlapping intervals
 merge them before scattering.

---
 .../queue/extensions/gatk/GATKIntervals.scala  |  4 +++-
 .../gatk/GATKIntervalsUnitTest.scala           | 18 ++++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala
index 9e47f64a1..b0483f0bb 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala
@@ -26,7 +26,7 @@ package org.broadinstitute.sting.queue.extensions.gatk
 
 import java.io.File
 import collection.JavaConversions._
-import org.broadinstitute.sting.utils.interval.IntervalUtils
+import org.broadinstitute.sting.utils.interval.{IntervalMergingRule, IntervalUtils}
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
 import net.sf.samtools.SAMFileHeader
 import java.util.Collections
@@ -51,6 +51,8 @@ case class GATKIntervals(reference: File, intervals: List[String]) {
         IntervalUtils.parseIntervalArguments(parser, intervals)
     Collections.sort(parsedLocs)
     Collections.unmodifiableList(parsedLocs)
+    val mergedLocs = IntervalUtils.mergeIntervalLocations(parsedLocs, IntervalMergingRule.OVERLAPPING_ONLY)
+    Collections.unmodifiableList(mergedLocs)
   }
 
   lazy val contigs = locs.map(_.getContig).distinct.toList
diff --git a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
index 38abe24ef..5383b3716 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
@@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile
 import org.broadinstitute.sting.utils.{GenomeLocSortedSet, GenomeLocParser}
 import collection.JavaConversions._
+import org.broadinstitute.sting.utils.interval.IntervalUtils
 
 class GATKIntervalsUnitTest {
   private final lazy val hg18Reference = new File(BaseTest.hg18Reference)
@@ -57,7 +58,7 @@ class GATKIntervalsUnitTest {
 //    Assert.assertEquals(gi.getSplits(3).toList, List(1, 2, 3))
   }
 
-  @Test(timeOut = 30000)
+  @Test(timeOut = 30000L)
   def testIntervalFile() {
     var gi = new GATKIntervals(hg19Reference, List(BaseTest.hg19Intervals))
     Assert.assertEquals(gi.locs.size, 189894)
@@ -65,7 +66,7 @@ class GATKIntervalsUnitTest {
     //   for(Item item: javaConvertedScalaList)
     // This for loop is actually an O(N^2) operation as the iterator calls the
     // O(N) javaConvertedScalaList.size() for each iteration of the loop.
-    //Assert.assertEquals(gi.getSplits(gi.locs.size).size, 189894)
+    Assert.assertEquals(IntervalUtils.splitFixedIntervals(gi.locs, 189894).size(), 189894)
     Assert.assertEquals(gi.contigs.size, 24)
   }
 
@@ -84,4 +85,17 @@ class GATKIntervalsUnitTest {
     Assert.assertEquals(new GATKIntervals(hg18Reference, List("chr1", "chr2", "chr3")).contigs, List("chr1", "chr2", "chr3"))
     Assert.assertEquals(new GATKIntervals(hg18Reference, List("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, List("chr1", "chr2", "chr3"))
   }
+
+  @Test
+  def testSortAndMergeIntervals() {
+    testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-10", "chr1:1-10"), Seq("chr1:1-10"))
+    testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:1-11", "chr1:1-12"), Seq("chr1:1-12"))
+    testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"), Seq("chr1:1-10", "chr1:11-20", "chr1:21-30"))
+    testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:10-20", "chr1:21-30"), Seq("chr1:1-20", "chr1:21-30"))
+    testSortAndMergeIntervals(Seq("chr1:1-10", "chr1:21-30", "chr1:10-20"), Seq("chr1:1-20", "chr1:21-30"))
+  }
+
+  private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) {
+    Assert.assertEquals(new GATKIntervals(hg18Reference, actual.toList).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
+  }
 }

From cda1e1b2079bf0d99c77c974ac0d3cdf883e2634 Mon Sep 17 00:00:00 2001
From: Khalid Shakir <kshakir@broadinstitute.org>
Date: Wed, 8 Feb 2012 02:24:54 -0500
Subject: [PATCH 30/67] Minor manual merge update for List class to Seq
 interface usage.

---
 .../sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
index 115b2021d..b23350557 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala
@@ -89,6 +89,6 @@ class GATKIntervalsUnitTest {
   }
 
   private def testSortAndMergeIntervals(actual: Seq[String], expected: Seq[String]) {
-    Assert.assertEquals(new GATKIntervals(hg18Reference, actual.toList).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
+    Assert.assertEquals(new GATKIntervals(hg18Reference, actual).locs.toSeq, expected.map(hg18GenomeLocParser.parseGenomeLoc(_)))
   }
 }

From 5b58fe741ad532156283822ab3e4dcccfdf1738c Mon Sep 17 00:00:00 2001
From: Matt Hanna <hanna@broadinstitute.org>
Date: Fri, 3 Feb 2012 16:43:00 -0500
Subject: [PATCH 31/67] Retiring Picard customizations for async I/O and
 cleaning up parts of the code to use common Picard utilities I recently
 discovered.

Also embedded bug fix for issues reading sparse shards and did some cleanup based on comments during BAM reading code transition meetings.
---
 .../src/net/sf/samtools/BAMFileReader.java    | 762 ------------------
 .../java/src/net/sf/samtools/GATKChunk.java   |   4 +
 .../net/sf/samtools/PicardNamespaceUtils.java |  39 +
 .../net/sf/samtools/util/BAMInputStream.java  |  72 --
 .../util/BlockCompressedInputStream.java      | 483 -----------
 ...ReaderPosition.java => BAMAccessPlan.java} |  62 +-
 .../reads/BGZFBlockLoadingDispatcher.java     |   8 +-
 .../datasources/reads/BlockInputStream.java   | 202 ++---
 .../gatk/datasources/reads/BlockLoader.java   |  22 +-
 .../gatk/datasources/reads/ReadShard.java     |   2 +-
 .../gatk/datasources/reads/SAMDataSource.java |  61 +-
 11 files changed, 238 insertions(+), 1479 deletions(-)
 delete mode 100644 public/java/src/net/sf/samtools/BAMFileReader.java
 create mode 100644 public/java/src/net/sf/samtools/PicardNamespaceUtils.java
 delete mode 100644 public/java/src/net/sf/samtools/util/BAMInputStream.java
 delete mode 100755 public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java
 rename public/java/src/org/broadinstitute/sting/gatk/datasources/reads/{SAMReaderPosition.java => BAMAccessPlan.java} (58%)

diff --git a/public/java/src/net/sf/samtools/BAMFileReader.java b/public/java/src/net/sf/samtools/BAMFileReader.java
deleted file mode 100644
index 5005b6265..000000000
--- a/public/java/src/net/sf/samtools/BAMFileReader.java
+++ /dev/null
@@ -1,762 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-package net.sf.samtools;
-
-
-import net.sf.samtools.util.*;
-import net.sf.samtools.SAMFileReader.ValidationStringency;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Internal class for reading and querying BAM files.
- */
-class BAMFileReader extends SAMFileReader.ReaderImplementation {
-    // True if reading from a File rather than an InputStream
-    private boolean mIsSeekable = false;
-
-    // For converting bytes into other primitive types
-    private BinaryCodec mStream = null;
-
-    // Underlying compressed data stream.
-    private final BAMInputStream mInputStream;
-    private SAMFileHeader mFileHeader = null;
-
-    // Populated if the file is seekable and an index exists
-    private File mIndexFile;
-    private BAMIndex mIndex = null;
-    private long mFirstRecordPointer = 0;
-    private CloseableIterator<SAMRecord> mCurrentIterator = null;
-
-    // If true, all SAMRecords are fully decoded as they are read.
-    private final boolean eagerDecode;
-
-    // For error-checking.
-    private ValidationStringency mValidationStringency;
-
-    // For creating BAMRecords
-    private SAMRecordFactory samRecordFactory;
-
-    /**
-     * Use the caching index reader implementation rather than the disk-hit-per-file model.
-     */
-    private boolean mEnableIndexCaching = false;
-
-    /**
-     * Use the traditional memory-mapped implementation for BAM file indexes rather than regular I/O.
-     */
-    private boolean mEnableIndexMemoryMapping = true;
-
-    /**
-     * Add information about the origin (reader and position) to SAM records.
-     */
-    private SAMFileReader mFileReader = null;
-
-    /**
-     * Prepare to read BAM from a stream (not seekable)
-     * @param stream source of bytes.
-     * @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
-     * @param validationStringency Controls how to handle invalidate reads or header lines.
-     */
-    BAMFileReader(final InputStream stream,
-                  final File indexFile,
-                  final boolean eagerDecode,
-                  final ValidationStringency validationStringency,
-                  final SAMRecordFactory factory)
-        throws IOException {
-        mIndexFile = indexFile;
-        mIsSeekable = false;
-        mInputStream = stream instanceof BAMInputStream ? (BAMInputStream)stream : new BlockCompressedInputStream(stream);
-        mStream = new BinaryCodec(new DataInputStream((InputStream)mInputStream));
-        this.eagerDecode = eagerDecode;
-        this.mValidationStringency = validationStringency;
-        this.samRecordFactory = factory;
-        readHeader(null);
-    }
-
-    /**
-     * Prepare to read BAM from a file (seekable)
-     * @param file source of bytes.
-     * @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
-     * @param validationStringency Controls how to handle invalidate reads or header lines.
-     */
-    BAMFileReader(final File file,
-                  final File indexFile,
-                  final boolean eagerDecode,
-                  final ValidationStringency validationStringency,
-                  final SAMRecordFactory factory)
-        throws IOException {
-        this(new BlockCompressedInputStream(file), indexFile!=null ? indexFile : findIndexFile(file), eagerDecode, file.getAbsolutePath(), validationStringency, factory);
-        if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified()) {
-            System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() +
-                    " is older than BAM " + file.getAbsolutePath());
-        }        
-    }
-
-    BAMFileReader(final SeekableStream strm,
-                  final File indexFile,
-                  final boolean eagerDecode,
-                  final ValidationStringency validationStringency,
-                  final SAMRecordFactory factory)
-        throws IOException {
-        this(strm instanceof BAMInputStream ? (BAMInputStream)strm : new BlockCompressedInputStream(strm),
-                indexFile,
-                eagerDecode,
-                strm.getSource(),
-                validationStringency,
-                factory);
-    }
-
-    private BAMFileReader(final BAMInputStream inputStream,
-                          final File indexFile,
-                          final boolean eagerDecode,
-                          final String source,
-                          final ValidationStringency validationStringency,
-                          final SAMRecordFactory factory)
-        throws IOException {
-        mIndexFile = indexFile;
-        mIsSeekable = true;
-        mInputStream = inputStream;
-        mStream = new BinaryCodec(new DataInputStream((InputStream)inputStream));
-        this.eagerDecode = eagerDecode;
-        this.mValidationStringency = validationStringency;
-        this.samRecordFactory = factory;
-        readHeader(source);
-        mFirstRecordPointer = inputStream.getFilePointer();
-    }
-
-    /**
-     * If true, writes the source of every read into the source SAMRecords.
-     * @param enabled true to write source information into each SAMRecord.
-     */
-    void enableFileSource(final SAMFileReader reader, final boolean enabled) {
-        this.mFileReader = enabled ? reader : null;
-    }
-
-    /**
-     * If true, uses the caching version of the index reader.
-     * @param enabled true to write source information into each SAMRecord.
-     */
-    public void enableIndexCaching(final boolean enabled) {
-        if(mIndex != null)
-            throw new SAMException("Unable to turn on index caching; index file has already been loaded.");
-        this.mEnableIndexCaching = enabled;
-    }
-
-    /**
-     * If false, disable the use of memory mapping for accessing index files (default behavior is to use memory mapping).
-     * This is slower but more scalable when accessing large numbers of BAM files sequentially.
-     * @param enabled True to use memory mapping, false to use regular I/O.
-     */
-    public void enableIndexMemoryMapping(final boolean enabled) {
-        if (mIndex != null) {
-            throw new SAMException("Unable to change index memory mapping; index file has already been loaded.");
-        }
-        this.mEnableIndexMemoryMapping = enabled;
-    }
-
-    @Override void enableCrcChecking(final boolean enabled) {
-        this.mInputStream.setCheckCrcs(enabled);
-    }
-
-    @Override void setSAMRecordFactory(final SAMRecordFactory factory) { this.samRecordFactory = factory; }
-
-    /**
-     * @return true if ths is a BAM file, and has an index
-     */
-    public boolean hasIndex() {
-        return (mIndexFile != null);
-    }
-
-    /**
-     * Retrieves the index for the given file type.  Ensure that the index is of the specified type.
-     * @return An index of the given type.
-     */
-    public BAMIndex getIndex() {
-        if(mIndexFile == null)
-            throw new SAMException("No index is available for this BAM file.");
-        if(mIndex == null)
-            mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
-                                         : new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
-        return mIndex;
-    }
-
-    void close() {
-        if (mStream != null) {
-            mStream.close();
-        }
-        if (mIndex != null) {
-            mIndex.close();
-        }
-        mStream = null;
-        mFileHeader = null;
-        mIndex = null;
-    }
-
-    SAMFileHeader getFileHeader() {
-        return mFileHeader;
-    }
-
-    /**
-     * Set error-checking level for subsequent SAMRecord reads.
-     */
-    void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) {
-        this.mValidationStringency = validationStringency;
-    }
-
-    SAMFileReader.ValidationStringency getValidationStringency() {
-        return this.mValidationStringency;
-    }
-
-    /**
-     * Prepare to iterate through the SAMRecords in file order.
-     * Only a single iterator on a BAM file can be extant at a time.  If getIterator() or a query method has been called once,
-     * that iterator must be closed before getIterator() can be called again.
-     * A somewhat peculiar aspect of this method is that if the file is not seekable, a second call to
-     * getIterator() begins its iteration where the last one left off.  That is the best that can be
-     * done in that situation.
-     */
-    CloseableIterator<SAMRecord> getIterator() {
-        if (mStream == null) {
-            throw new IllegalStateException("File reader is closed");
-        }
-        if (mCurrentIterator != null) {
-            throw new IllegalStateException("Iteration in progress");
-        }
-        if (mIsSeekable) {
-            try {
-                mInputStream.seek(mFirstRecordPointer);
-            } catch (IOException exc) {
-                throw new RuntimeException(exc.getMessage(), exc);
-            }
-        }
-        mCurrentIterator = new BAMFileIterator();
-        return mCurrentIterator;
-    }
-
-    @Override
-    CloseableIterator<SAMRecord> getIterator(final SAMFileSpan chunks) {
-        if (mStream == null) {
-            throw new IllegalStateException("File reader is closed");
-        }
-        if (mCurrentIterator != null) {
-            throw new IllegalStateException("Iteration in progress");
-        }
-        if (!(chunks instanceof BAMFileSpan)) {
-            throw new IllegalStateException("BAMFileReader cannot handle this type of file span.");
-        }
-
-        // Create an iterator over the given chunk boundaries.
-        mCurrentIterator = new BAMFileIndexIterator(((BAMFileSpan)chunks).toCoordinateArray());
-        return mCurrentIterator;
-    }
-
-    /**
-     * Gets an unbounded pointer to the first record in the BAM file.  Because the reader doesn't necessarily know
-     * when the file ends, the rightmost bound of the file pointer will not end exactly where the file ends.  However,
-     * the rightmost bound is guaranteed to be after the last read in the file.
-     * @return An unbounded pointer to the first record in the BAM file.
-     */
-    @Override
-    SAMFileSpan getFilePointerSpanningReads() {
-        return new BAMFileSpan(new Chunk(mFirstRecordPointer,Long.MAX_VALUE));
-    }
-
-    /**
-     * Prepare to iterate through the SAMRecords that match the given interval.
-     * Only a single iterator on a BAMFile can be extant at a time.  The previous one must be closed
-     * before calling any of the methods that return an iterator.
-     *
-     * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
-     * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
-     * matches the specified interval.
-     *
-     * Note that this method is not necessarily efficient in terms of disk I/O.  The index does not have perfect
-     * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
-     *
-     * @param sequence Reference sequence sought.
-     * @param start Desired SAMRecords must overlap or be contained in the interval specified by start and end.
-     * A value of zero implies the start of the reference sequence.
-     * @param end A value of zero implies the end of the reference sequence.
-     * @param contained If true, the alignments for the SAMRecords must be completely contained in the interval
-     * specified by start and end.  If false, the SAMRecords need only overlap the interval.
-     * @return Iterator for the matching SAMRecords
-     */
-    CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
-        if (mStream == null) {
-            throw new IllegalStateException("File reader is closed");
-        }
-        if (mCurrentIterator != null) {
-            throw new IllegalStateException("Iteration in progress");
-        }
-        if (!mIsSeekable) {
-            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
-        }
-        mCurrentIterator = createIndexIterator(sequence, start, end, contained? QueryType.CONTAINED: QueryType.OVERLAPPING);
-        return mCurrentIterator;
-    }
-
-    /**
-     * Prepare to iterate through the SAMRecords with the given alignment start.
-     * Only a single iterator on a BAMFile can be extant at a time.  The previous one must be closed
-     * before calling any of the methods that return an iterator.
-     *
-     * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
-     * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
-     * matches the specified interval.
-     *
-     * Note that this method is not necessarily efficient in terms of disk I/O.  The index does not have perfect
-     * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
-     *
-     * @param sequence Reference sequence sought.
-     * @param start Alignment start sought.
-     * @return Iterator for the matching SAMRecords.
-     */
-    CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) {
-        if (mStream == null) {
-            throw new IllegalStateException("File reader is closed");
-        }
-        if (mCurrentIterator != null) {
-            throw new IllegalStateException("Iteration in progress");
-        }
-        if (!mIsSeekable) {
-            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
-        }
-        mCurrentIterator = createIndexIterator(sequence, start, -1, QueryType.STARTING_AT);
-        return mCurrentIterator;
-    }
-
-    public CloseableIterator<SAMRecord> queryUnmapped() {
-        if (mStream == null) {
-            throw new IllegalStateException("File reader is closed");
-        }
-        if (mCurrentIterator != null) {
-            throw new IllegalStateException("Iteration in progress");
-        }
-        if (!mIsSeekable) {
-            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
-        }
-        try {
-            final long startOfLastLinearBin = getIndex().getStartOfLastLinearBin();
-            if (startOfLastLinearBin != -1) {
-                mInputStream.seek(startOfLastLinearBin);
-            } else {
-                // No mapped reads in file, just start at the first read in file.
-                mInputStream.seek(mFirstRecordPointer);
-            }
-            mCurrentIterator = new BAMFileIndexUnmappedIterator();
-            return mCurrentIterator;
-        } catch (IOException e) {
-            throw new RuntimeException("IOException seeking to unmapped reads", e);
-        }
-    }
-
-    /**
-     * Reads the header from the file or stream
-     * @param source Note that this is used only for reporting errors.
-     */
-    private void readHeader(final String source)
-        throws IOException {
-
-        final byte[] buffer = new byte[4];
-        mStream.readBytes(buffer);
-        if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) {
-            throw new IOException("Invalid BAM file header");
-        }
-
-        final int headerTextLength = mStream.readInt();
-        final String textHeader = mStream.readString(headerTextLength);
-        final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
-        headerCodec.setValidationStringency(mValidationStringency);
-        mFileHeader = headerCodec.decode(new StringLineReader(textHeader),
-                source);
-
-        final int sequenceCount = mStream.readInt();
-        if (mFileHeader.getSequenceDictionary().size() > 0) {
-            // It is allowed to have binary sequences but no text sequences, so only validate if both are present
-            if (sequenceCount != mFileHeader.getSequenceDictionary().size()) {
-                throw new SAMFormatException("Number of sequences in text header (" +
-                        mFileHeader.getSequenceDictionary().size() +
-                        ") != number of sequences in binary header (" + sequenceCount + ") for file " + source);
-            }
-            for (int i = 0; i < sequenceCount; i++) {
-                final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(source);
-                final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i);
-                if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
-                    throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " +
-                            source);
-                }
-                if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
-                    throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " +
-                            source);
-                }
-            }
-        } else {
-            // If only binary sequences are present, copy them into mFileHeader
-            final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
-            for (int i = 0; i < sequenceCount; i++) {
-                sequences.add(readSequenceRecord(source));
-            }
-            mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
-        }
-    }
-
-    /**
-     * Reads a single binary sequence record from the file or stream
-     * @param source Note that this is used only for reporting errors.
-     */
-    private SAMSequenceRecord readSequenceRecord(final String source) {
-        final int nameLength = mStream.readInt();
-        if (nameLength <= 1) {
-            throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source);
-        }
-        final String sequenceName = mStream.readString(nameLength - 1);
-        // Skip the null terminator
-        mStream.readByte();
-        final int sequenceLength = mStream.readInt();
-        return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength);
-    }
-
-    /**
-     * Iterator for non-indexed sequential iteration through all SAMRecords in file.
-     * Starting point of iteration is wherever current file position is when the iterator is constructed.
-     */
-    private class BAMFileIterator implements CloseableIterator<SAMRecord> {
-        private SAMRecord mNextRecord = null;
-        private final BAMRecordCodec bamRecordCodec;
-        private long samRecordIndex = 0; // Records at what position (counted in records) we are at in the file
-
-        BAMFileIterator() {
-            this(true);
-        }
-
-        /**
-         * @param advance Trick to enable subclass to do more setup before advancing
-         */
-        BAMFileIterator(final boolean advance) {
-            this.bamRecordCodec = new BAMRecordCodec(getFileHeader(), samRecordFactory);
-            this.bamRecordCodec.setInputStream(BAMFileReader.this.mStream.getInputStream());
-
-            if (advance) {
-                advance();
-            }
-        }
-
-        public void close() {
-            if (mCurrentIterator != null && this != mCurrentIterator) {
-                throw new IllegalStateException("Attempt to close non-current iterator");
-            }
-            mCurrentIterator = null;
-        }
-
-        public boolean hasNext() {
-            return (mNextRecord != null);
-        }
-
-        public SAMRecord next() {
-            final SAMRecord result = mNextRecord;
-            advance();
-            return result;
-        }
-
-        public void remove() {
-            throw new UnsupportedOperationException("Not supported: remove");
-        }
-
-        void advance() {
-            try {
-                mNextRecord = getNextRecord();
-
-                if (mNextRecord != null) {
-                    ++this.samRecordIndex;
-                    // Because some decoding is done lazily, the record needs to remember the validation stringency.
-                    mNextRecord.setValidationStringency(mValidationStringency);
-
-                    if (mValidationStringency != ValidationStringency.SILENT) {
-                        final List<SAMValidationError> validationErrors = mNextRecord.isValid();
-                        SAMUtils.processValidationErrors(validationErrors,
-                                this.samRecordIndex, BAMFileReader.this.getValidationStringency());
-                    }
-                }
-                if (eagerDecode && mNextRecord != null) {
-                    mNextRecord.eagerDecode();
-                }
-            } catch (IOException exc) {
-                throw new RuntimeException(exc.getMessage(), exc);
-            }
-        }
-
-        /**
-         * Read the next record from the input stream.
-         */
-        SAMRecord getNextRecord() throws IOException {
-            final long startCoordinate = mInputStream.getFilePointer();
-            final SAMRecord next = bamRecordCodec.decode();
-            final long stopCoordinate = mInputStream.getFilePointer();
-
-            if(mFileReader != null && next != null)
-                next.setFileSource(new SAMFileSource(mFileReader,new BAMFileSpan(new Chunk(startCoordinate,stopCoordinate))));
-
-            return next;
-        }
-
-        /**
-         * @return The record that will be return by the next call to next()
-         */
-        protected SAMRecord peek() {
-            return mNextRecord;
-        }
-    }
-
-    /**
-     * Prepare to iterate through SAMRecords matching the target interval.
-     * @param sequence Desired reference sequence.
-     * @param start 1-based start of target interval, inclusive.
-     * @param end 1-based end of target interval, inclusive.
-     * @param queryType contained, overlapping, or starting-at query.
-     */
-    private CloseableIterator<SAMRecord> createIndexIterator(final String sequence,
-                                                             final int start,
-                                                             final int end,
-                                                             final QueryType queryType) {
-        long[] filePointers = null;
-
-        // Hit the index to determine the chunk boundaries for the required data.
-        final SAMFileHeader fileHeader = getFileHeader();
-        final int referenceIndex = fileHeader.getSequenceIndex(sequence);
-        if (referenceIndex != -1) {
-            final BAMIndex fileIndex = getIndex();
-            final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, end);
-            filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null;
-        }
-
-        // Create an iterator over the above chunk boundaries.
-        final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
-
-        // Add some preprocessing filters for edge-case reads that don't fit into this
-        // query type.
-        return new BAMQueryFilteringIterator(iterator,sequence,start,end,queryType);
-    }
-
-    enum QueryType {CONTAINED, OVERLAPPING, STARTING_AT}
-
-    /**
-     * Look for BAM index file according to standard naming convention.
-     *
-     * @param dataFile BAM file name.
-     * @return Index file name, or null if not found.
-     */
-    private static File findIndexFile(final File dataFile) {
-        // If input is foo.bam, look for foo.bai
-        final String bamExtension = ".bam";
-        File indexFile;
-        final String fileName = dataFile.getName();
-        if (fileName.endsWith(bamExtension)) {
-            final String bai = fileName.substring(0, fileName.length() - bamExtension.length()) + BAMIndex.BAMIndexSuffix;
-            indexFile = new File(dataFile.getParent(), bai);
-            if (indexFile.exists()) {
-                return indexFile;
-            }
-        }
-
-        // If foo.bai doesn't exist look for foo.bam.bai
-        indexFile = new File(dataFile.getParent(), dataFile.getName() + ".bai");
-        if (indexFile.exists()) {
-            return indexFile;
-        } else {
-            return null;
-        }
-    }    
-
-    private class BAMFileIndexIterator extends BAMFileIterator {
-
-        private long[] mFilePointers = null;
-        private int mFilePointerIndex = 0;
-        private long mFilePointerLimit = -1;
-
-        /**
-         * Prepare to iterate through SAMRecords stored in the specified compressed blocks at the given offset.
-         * @param filePointers the block / offset combination, stored in chunk format.
-         */
-        BAMFileIndexIterator(final long[] filePointers) {
-            super(false);  // delay advance() until after construction
-            mFilePointers = filePointers;
-            advance();
-        }
-
-        SAMRecord getNextRecord()
-            throws IOException {
-            // Advance to next file block if necessary
-            while (mInputStream.getFilePointer() >= mFilePointerLimit) {
-                if (mFilePointers == null ||
-                        mFilePointerIndex >= mFilePointers.length) {
-                    return null;
-                }
-                final long startOffset = mFilePointers[mFilePointerIndex++];
-                final long endOffset = mFilePointers[mFilePointerIndex++];
-                mInputStream.seek(startOffset);
-                mFilePointerLimit = endOffset;
-            }
-            // Pull next record from stream
-            return super.getNextRecord();
-        }
-    }
-
-    /**
-     * A decorating iterator that filters out records that are outside the bounds of the
-     * given query parameters.
-     */
-    private class BAMQueryFilteringIterator implements CloseableIterator<SAMRecord> {
-        /**
-         * The wrapped iterator.
-         */
-        private final CloseableIterator<SAMRecord> wrappedIterator;
-
-        /**
-         * The next record to be returned.  Will be null if no such record exists.
-         */
-        private SAMRecord mNextRecord;
-
-        private final int mReferenceIndex;
-        private final int mRegionStart;
-        private final int mRegionEnd;
-        private final QueryType mQueryType;
-
-        public BAMQueryFilteringIterator(final CloseableIterator<SAMRecord> iterator,final String sequence, final int start, final int end, final QueryType queryType) {
-            this.wrappedIterator = iterator;
-            final SAMFileHeader fileHeader = getFileHeader();
-            mReferenceIndex = fileHeader.getSequenceIndex(sequence);
-            mRegionStart = start;
-            if (queryType == QueryType.STARTING_AT) {
-                mRegionEnd = mRegionStart;
-            } else {
-                mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
-            }
-            mQueryType = queryType;
-            mNextRecord = advance();
-        }
-
-        /**
-         * Returns true if a next element exists; false otherwise.
-         */
-        public boolean hasNext() {
-            return mNextRecord != null;
-        }
-
-        /**
-         * Gets the next record from the given iterator.
-         * @return The next SAM record in the iterator.
-         */
-        public SAMRecord next() {
-            if(!hasNext())
-                throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available");
-            final SAMRecord currentRead = mNextRecord;
-            mNextRecord = advance();
-            return currentRead;
-        }
-
-        /**
-         * Closes down the existing iterator.
-         */
-        public void close() {
-            if (this != mCurrentIterator) {
-                throw new IllegalStateException("Attempt to close non-current iterator");
-            }
-            mCurrentIterator = null;
-        }
-
-        /**
-         * @throws UnsupportedOperationException always.
-         */
-        public void remove() {
-            throw new UnsupportedOperationException("Not supported: remove");
-        }
-
-        SAMRecord advance() {
-            while (true) {
-                // Pull next record from stream
-                if(!wrappedIterator.hasNext())
-                    return null;
-
-                final SAMRecord record = wrappedIterator.next();
-                // If beyond the end of this reference sequence, end iteration
-                final int referenceIndex = record.getReferenceIndex();
-                if (referenceIndex != mReferenceIndex) {
-                    if (referenceIndex < 0 ||
-                        referenceIndex > mReferenceIndex) {
-                        return null;
-                    }
-                    // If before this reference sequence, continue
-                    continue;
-                }
-                if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
-                    // Quick exit to avoid expensive alignment end calculation
-                    return record;
-                }
-                final int alignmentStart = record.getAlignmentStart();
-                // If read is unmapped but has a coordinate, return it if the coordinate is within
-                // the query region, regardless of whether the mapped mate will be returned.
-                final int alignmentEnd;
-                if (mQueryType == QueryType.STARTING_AT) {
-                    alignmentEnd = -1;
-                } else {
-                    alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START?
-                            record.getAlignmentEnd(): alignmentStart);
-                }
-
-                if (alignmentStart > mRegionEnd) {
-                    // If scanned beyond target region, end iteration
-                    return null;
-                }
-                // Filter for overlap with region
-                if (mQueryType == QueryType.CONTAINED) {
-                    if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
-                        return record;
-                    }
-                } else if (mQueryType == QueryType.OVERLAPPING) {
-                    if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
-                        return record;
-                    }
-                } else {
-                    if (alignmentStart == mRegionStart) {
-                        return record;
-                    }
-                }
-            }
-        }
-    }
-
-    private class BAMFileIndexUnmappedIterator extends BAMFileIterator  {
-        private BAMFileIndexUnmappedIterator() {
-            while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
-                advance();
-            }
-        }
-    }
-
-}
diff --git a/public/java/src/net/sf/samtools/GATKChunk.java b/public/java/src/net/sf/samtools/GATKChunk.java
index 5d349e72e..e9335a86d 100644
--- a/public/java/src/net/sf/samtools/GATKChunk.java
+++ b/public/java/src/net/sf/samtools/GATKChunk.java
@@ -40,6 +40,10 @@ public class GATKChunk extends Chunk {
         super(start,stop);
     }
 
+    public GATKChunk(final long blockStart, final int blockOffsetStart, final long blockEnd, final int blockOffsetEnd) {
+        super(blockStart << 16 | blockOffsetStart,blockEnd << 16 | blockOffsetEnd);
+    }
+
     public GATKChunk(final Chunk chunk) {
         super(chunk.getChunkStart(),chunk.getChunkEnd());
     }
diff --git a/public/java/src/net/sf/samtools/PicardNamespaceUtils.java b/public/java/src/net/sf/samtools/PicardNamespaceUtils.java
new file mode 100644
index 000000000..b645f8fdc
--- /dev/null
+++ b/public/java/src/net/sf/samtools/PicardNamespaceUtils.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package net.sf.samtools;
+
+/**
+ * Utils that insist on being in the same package as Picard.
+ */
+public class PicardNamespaceUtils {
+    /**
+     * Private constructor only.  Do not instantiate.
+     */
+    private PicardNamespaceUtils() {}
+
+    public static void setFileSource(final SAMRecord read, final SAMFileSource fileSource) {
+        read.setFileSource(fileSource);
+    }
+}
diff --git a/public/java/src/net/sf/samtools/util/BAMInputStream.java b/public/java/src/net/sf/samtools/util/BAMInputStream.java
deleted file mode 100644
index d825c23d5..000000000
--- a/public/java/src/net/sf/samtools/util/BAMInputStream.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package net.sf.samtools.util;
-
-import java.io.IOException;
-
-/**
- * An input stream formulated for use reading BAM files.  Supports
- */
-public interface BAMInputStream {
-    /**
-     * Seek to the given position in the file.  Note that pos is a special virtual file pointer,
-     * not an actual byte offset.
-     *
-     * @param pos virtual file pointer
-     */
-    public void seek(final long pos) throws IOException;
-
-    /**
-     * @return virtual file pointer that can be passed to seek() to return to the current position.  This is
-     * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
-     * the two.
-     */
-    public long getFilePointer();
-
-    /**
-     * Determines whether or not the inflater will re-calculated the CRC on the decompressed data
-     * and check it against the value stored in the GZIP header.  CRC checking is an expensive
-     * operation and should be used accordingly.
-     */
-    public void setCheckCrcs(final boolean check);
-
-    public int read() throws java.io.IOException;
-
-    public int read(byte[] bytes) throws java.io.IOException;
-
-    public int read(byte[] bytes, int i, int i1) throws java.io.IOException;
-
-    public long skip(long l) throws java.io.IOException;
-
-    public int available() throws java.io.IOException;
-
-    public void close() throws java.io.IOException;
-
-    public void mark(int i);
-
-    public void reset() throws java.io.IOException;
-
-    public boolean markSupported();
-}
diff --git a/public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java b/public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java
deleted file mode 100755
index fae2fc89b..000000000
--- a/public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package net.sf.samtools.util;
-
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.RandomAccessFile;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.util.Arrays;
-
-import net.sf.samtools.FileTruncatedException;
-
-/*
- * Utility class for reading BGZF block compressed files.  The caller can treat this file like any other InputStream.
- * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
- * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
- * entire file up to the location being sought.  Note that seeking is only possible if the ctor(File) is used.
- *
- * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format
- */
-public class BlockCompressedInputStream extends InputStream implements BAMInputStream {
-    private InputStream mStream = null;
-    private SeekableStream mFile = null;
-    private byte[] mFileBuffer = null;
-    private byte[] mCurrentBlock = null;
-    private int mCurrentOffset = 0;
-    private long mBlockAddress = 0;
-    private int mLastBlockLength = 0;
-    private final BlockGunzipper blockGunzipper = new BlockGunzipper();
-
-
-    /**
-     * Note that seek() is not supported if this ctor is used.
-     */
-    public BlockCompressedInputStream(final InputStream stream) {
-        mStream = IOUtil.toBufferedStream(stream);
-        mFile = null;
-    }
-
-    /**
-     * Use this ctor if you wish to call seek()
-     */
-    public BlockCompressedInputStream(final File file)
-        throws IOException {
-        mFile = new SeekableFileStream(file);
-        mStream = null;
-
-    }
-
-    public BlockCompressedInputStream(final URL url) {
-        mFile = new SeekableBufferedStream(new SeekableHTTPStream(url));
-        mStream = null;
-    }
-
-    /**
-     * For providing some arbitrary data source.  No additional buffering is
-     * provided, so if the underlying source is not buffered, wrap it in a
-     * SeekableBufferedStream before passing to this ctor.
-     */
-    public BlockCompressedInputStream(final SeekableStream strm) {
-        mFile = strm;
-        mStream = null;
-    }
-
-    /**
-     * Determines whether or not the inflater will re-calculated the CRC on the decompressed data
-     * and check it against the value stored in the GZIP header.  CRC checking is an expensive
-     * operation and should be used accordingly.
-     */
-    public void setCheckCrcs(final boolean check) {
-        this.blockGunzipper.setCheckCrcs(check);
-    }
-
-    /**
-     * @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
-     * next caller of a method for this input stream. The next caller might be the same thread or another thread.
-     * Note that although the next caller can read this many bytes without blocking, the available() method call itself
-     * may block in order to fill an internal buffer if it has been exhausted.
-     */
-    public int available()
-        throws IOException {
-        if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) {
-            readBlock();
-        }
-        if (mCurrentBlock == null) {
-            return 0;
-        }
-        return mCurrentBlock.length - mCurrentOffset;
-    }
-
-    /**
-     * Closes the underlying InputStream or RandomAccessFile
-     */
-    public void close()
-        throws IOException {
-        if (mFile != null) {
-            mFile.close();
-            mFile = null;
-        } else if (mStream != null) {
-            mStream.close();
-            mStream = null;
-        }
-        // Encourage garbage collection
-        mFileBuffer = null;
-        mCurrentBlock = null;
-    }
-
-    /**
-     * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
-     * If no byte is available because the end of the stream has been reached, the value -1 is returned.
-     * This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
-
-     * @return the next byte of data, or -1 if the end of the stream is reached.
-     */
-    public int read()
-        throws IOException {
-        return (available() > 0) ? mCurrentBlock[mCurrentOffset++] : -1;
-    }
-
-    /**
-     * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
-     * actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
-     * or an exception is thrown.
-     *
-     * read(buf) has the same effect as read(buf, 0, buf.length).
-     *
-     * @param buffer the buffer into which the data is read.
-     * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
-     * the stream has been reached.
-     */
-    public int read(final byte[] buffer)
-        throws IOException {
-        return read(buffer, 0, buffer.length);
-    }
-
-    private volatile ByteArrayOutputStream buf = null;
-    private static final byte eol = '\n';
-    private static final byte eolCr = '\r';
-    
-    /**
-     * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'), 
-     * carriage return ('\r') or carriage return followed by a line feed ("\r\n").
-     *
-     * @return  A String containing the contents of the line, excluding the line terminating
-     *          character, or null if the end of the stream has been reached
-     *
-     * @exception  IOException  If an I/O error occurs
-     * @
-     */
-    public String readLine() throws IOException {
-    	int available = available();
-        if (available == 0) {
-            return null;
-        }
-        if(null == buf){ // lazy initialisation 
-        	buf = new ByteArrayOutputStream(8192);
-        }
-        buf.reset();
-    	boolean done = false;
-    	boolean foundCr = false; // \r found flag
-        while (!done) {
-        	int linetmpPos = mCurrentOffset;
-        	int bCnt = 0;
-        	while((available-- > 0)){
-        		final byte c = mCurrentBlock[linetmpPos++];
-        		if(c == eol){ // found \n
-        			done = true;
-        			break;
-        		} else if(foundCr){  // previous char was \r
-        			--linetmpPos; // current char is not \n so put it back
-        			done = true;
-        			break;
-        		} else if(c == eolCr){ // found \r
-					foundCr = true;
-        			continue; // no ++bCnt
-        		}
-				++bCnt;
-        	}
-        	if(mCurrentOffset < linetmpPos){
-				buf.write(mCurrentBlock, mCurrentOffset, bCnt);
-	        	mCurrentOffset = linetmpPos;
-        	}
-        	available = available();    
-        	if(available == 0){
-        		// EOF
-        		done = true;
-        	}
-        }
-    	return buf.toString();
-    }
-
-    /**
-     * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
-     * as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
-     *
-     * This method blocks until input data is available, end of file is detected, or an exception is thrown.
-     *
-     * @param buffer buffer into which data is read.
-     * @param offset the start offset in array b  at which the data is written.
-     * @param length the maximum number of bytes to read.
-     * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
-     * the stream has been reached.
-     */
-    public int read(final byte[] buffer, int offset, int length)
-        throws IOException {
-        final int originalLength = length;
-        while (length > 0) {
-            final int available = available();
-            if (available == 0) {
-                // Signal EOF to caller
-                if (originalLength == length) {
-                    return -1;
-                }
-                break;
-            }
-            final int copyLength = Math.min(length, available);
-            System.arraycopy(mCurrentBlock, mCurrentOffset, buffer, offset, copyLength);
-            mCurrentOffset += copyLength;
-            offset += copyLength;
-            length -= copyLength;
-        }
-        return originalLength - length;
-    }
-
-    /**
-     * Seek to the given position in the file.  Note that pos is a special virtual file pointer,
-     * not an actual byte offset.
-     *
-     * @param pos virtual file pointer
-     */
-    public void seek(final long pos)
-        throws IOException {
-        if (mFile == null) {
-            throw new IOException("Cannot seek on stream based file");
-        }
-        // Decode virtual file pointer
-        // Upper 48 bits is the byte offset into the compressed stream of a block.
-        // Lower 16 bits is the byte offset into the uncompressed stream inside the block.
-        final long compressedOffset = BlockCompressedFilePointerUtil.getBlockAddress(pos);
-        final int uncompressedOffset = BlockCompressedFilePointerUtil.getBlockOffset(pos);
-        final int available;
-        if (mBlockAddress == compressedOffset && mCurrentBlock != null) {
-            available = mCurrentBlock.length;
-        } else {
-            mFile.seek(compressedOffset);
-            mBlockAddress = compressedOffset;
-            mLastBlockLength = 0;
-            readBlock();
-            available = available();
-        }
-        if (uncompressedOffset > available ||
-                (uncompressedOffset == available && !eof())) {
-            throw new IOException("Invalid file pointer: " + pos);
-        }
-        mCurrentOffset = uncompressedOffset;
-    }
-
-    private boolean eof() throws IOException {
-        if (mFile.eof()) {
-            return true;
-        }
-        // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
-        return (mFile.length() - (mBlockAddress + mLastBlockLength) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
-    }
-
-    /**
-     * @return virtual file pointer that can be passed to seek() to return to the current position.  This is
-     * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
-     * the two.
-     */
-    public long getFilePointer() {
-        if (mCurrentOffset == mCurrentBlock.length) {
-            // If current offset is at the end of the current block, file pointer should point
-            // to the beginning of the next block.
-            return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress + mLastBlockLength, 0);
-        }
-        return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, mCurrentOffset);
-    }
-
-    public static long getFileBlock(final long bgzfOffset) {
-        return BlockCompressedFilePointerUtil.getBlockAddress(bgzfOffset);
-    }
-    
-    /**
-     * @param stream Must be at start of file.  Throws RuntimeException if !stream.markSupported().
-     * @return true if the given file looks like a valid BGZF file.
-     */
-    public static boolean isValidFile(final InputStream stream)
-        throws IOException {
-        if (!stream.markSupported()) {
-            throw new RuntimeException("Cannot test non-buffered stream");
-        }
-        stream.mark(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
-        final byte[] buffer = new byte[BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
-        final int count = readBytes(stream, buffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
-        stream.reset();
-        return count == BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
-    }
-
-    private static boolean isValidBlockHeader(final byte[] buffer) {
-        return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 &&
-                (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 &&
-                (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 &&
-                buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN &&
-                buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 &&
-                buffer[13] == BlockCompressedStreamConstants.BGZF_ID2);
-    }
-
-    private void readBlock()
-        throws IOException {
-
-        if (mFileBuffer == null) {
-            mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
-        }
-        int count = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
-        if (count == 0) {
-            // Handle case where there is no empty gzip block at end.
-            mCurrentOffset = 0;
-            mBlockAddress += mLastBlockLength;
-            mCurrentBlock = new byte[0];
-            return;
-        }
-        if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
-            throw new IOException("Premature end of file");
-        }
-        final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
-        if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
-            throw new IOException("Unexpected compressed block length: " + blockLength);
-        }
-        final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH;
-        count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining);
-        if (count != remaining) {
-            throw new FileTruncatedException("Premature end of file");
-        }
-        inflateBlock(mFileBuffer, blockLength);
-        mCurrentOffset = 0;
-        mBlockAddress += mLastBlockLength;
-        mLastBlockLength = blockLength;
-    }
-
-    private void inflateBlock(final byte[] compressedBlock, final int compressedLength)
-        throws IOException {
-        final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4);
-        byte[] buffer = mCurrentBlock;
-        mCurrentBlock = null;
-        if (buffer == null || buffer.length != uncompressedLength) {
-            try {
-                buffer = new byte[uncompressedLength];
-            } catch (NegativeArraySizeException e) {
-                throw new RuntimeException("BGZF file has invalid uncompressedLength: " + uncompressedLength, e);
-            }
-        }
-        blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
-        mCurrentBlock = buffer;
-    }
-
-    private int readBytes(final byte[] buffer, final int offset, final int length)
-        throws IOException {
-        if (mFile != null) {
-            return readBytes(mFile, buffer, offset, length);
-        } else if (mStream != null) {
-            return readBytes(mStream, buffer, offset, length);
-        } else {
-            return 0;
-        }
-    }
-
-    private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length)
-        throws IOException {
-        int bytesRead = 0;
-        while (bytesRead < length) {
-            final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
-            if (count <= 0) {
-                break;
-            }
-            bytesRead += count;
-        }
-        return bytesRead;
-    }
-
-    private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
-        throws IOException {
-        int bytesRead = 0;
-        while (bytesRead < length) {
-            final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
-            if (count <= 0) {
-                break;
-            }
-            bytesRead += count;
-        }
-        return bytesRead;
-    }
-
-    private int unpackInt16(final byte[] buffer, final int offset) {
-        return ((buffer[offset] & 0xFF) |
-                ((buffer[offset+1] & 0xFF) << 8));
-    }
-
-    private int unpackInt32(final byte[] buffer, final int offset) {
-        return ((buffer[offset] & 0xFF) |
-                ((buffer[offset+1] & 0xFF) << 8) |
-                ((buffer[offset+2] & 0xFF) << 16) |
-                ((buffer[offset+3] & 0xFF) << 24));
-    }
-
-    public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
-
-    public static FileTermination checkTermination(final File file)
-        throws IOException {
-        final long fileSize = file.length();
-        if (fileSize < BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length) {
-            return FileTermination.DEFECTIVE;
-        }
-        final RandomAccessFile raFile = new RandomAccessFile(file, "r");
-        try {
-            raFile.seek(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
-            byte[] buf = new byte[BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length];
-            raFile.readFully(buf);
-            if (Arrays.equals(buf, BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) {
-                return FileTermination.HAS_TERMINATOR_BLOCK;
-            }
-            final int bufsize = (int)Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
-            buf = new byte[bufsize];
-            raFile.seek(fileSize - bufsize);
-            raFile.read(buf);
-            for (int i = buf.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length;
-                    i >= 0; --i) {
-                if (!preambleEqual(BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE,
-                        buf, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
-                    continue;
-                }
-                final ByteBuffer byteBuffer = ByteBuffer.wrap(buf, i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, 4);
-                byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-                final int totalBlockSizeMinusOne =  byteBuffer.getShort() & 0xFFFF;
-                if (buf.length - i == totalBlockSizeMinusOne + 1) {
-                    return FileTermination.HAS_HEALTHY_LAST_BLOCK;
-                } else {
-                    return FileTermination.DEFECTIVE;
-                }
-            }
-            return FileTermination.DEFECTIVE;
-        } finally {
-            raFile.close();
-        }
-    }
-
-    private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
-        for (int i = 0; i < length; ++i) {
-            if (preamble[i] != buf[i + startOffset]) {
-                return false;
-            }
-        }
-        return true;
-    }
-}
-
-
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMAccessPlan.java
similarity index 58%
rename from public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
rename to public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMAccessPlan.java
index 0a6173c1e..164971365 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMAccessPlan.java
@@ -27,8 +27,10 @@ package org.broadinstitute.sting.gatk.datasources.reads;
 import net.sf.picard.util.PeekableIterator;
 import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.GATKChunk;
+import net.sf.samtools.util.BlockCompressedFilePointerUtil;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
+import java.util.LinkedList;
 import java.util.List;
 
 /**
@@ -38,7 +40,7 @@ import java.util.List;
 * Time: 10:47 PM
 * To change this template use File | Settings | File Templates.
 */
-class SAMReaderPosition {
+class BAMAccessPlan {
     private final SAMReaderID reader;
     private final BlockInputStream inputStream;
 
@@ -51,7 +53,7 @@ class SAMReaderPosition {
     private long nextBlockAddress;
 
 
-    SAMReaderPosition(final SAMReaderID reader, final BlockInputStream inputStream, GATKBAMFileSpan fileSpan) {
+    BAMAccessPlan(final SAMReaderID reader, final BlockInputStream inputStream, GATKBAMFileSpan fileSpan) {
         this.reader = reader;
         this.inputStream = inputStream;
 
@@ -84,11 +86,45 @@ class SAMReaderPosition {
     }
 
     /**
-     * Retrieves the last offset of interest in the block returned by getBlockAddress().
-     * @return First block of interest in this segment.
+     * Gets the spans overlapping the given block; used to copy the contents of the block into the circular buffer.
+     * @param blockAddress Block address for which to search.
+     * @param filePosition Block address at which to terminate the last chunk if the last chunk goes beyond this span.
+     * @return list of chunks containing that block.
      */
-    public int getLastOffsetInBlock() {
-        return (nextBlockAddress == positionIterator.peek().getBlockEnd()) ? positionIterator.peek().getBlockOffsetEnd() : 65536;
+    public List<GATKChunk> getSpansOverlappingBlock(long blockAddress, long filePosition) {
+        List<GATKChunk> spansOverlapping = new LinkedList<GATKChunk>();
+        // While the position iterator overlaps the given block, pull out spans to report.
+        while(positionIterator.hasNext() && positionIterator.peek().getBlockStart() <= blockAddress) {
+            // Create a span over as much of the block as is covered by this chunk.
+            int blockOffsetStart = (blockAddress == positionIterator.peek().getBlockStart()) ? positionIterator.peek().getBlockOffsetStart() : 0;
+
+            // Calculate the end of this span.  If the span extends past this block, cap it using the current file position.
+            long blockEnd;
+            int blockOffsetEnd;
+            if(blockAddress < positionIterator.peek().getBlockEnd()) {
+                blockEnd = filePosition;
+                blockOffsetEnd = 0;
+            }
+            else {
+                blockEnd = positionIterator.peek().getBlockEnd();
+                blockOffsetEnd = positionIterator.peek().getBlockOffsetEnd();
+            }
+
+            GATKChunk newChunk = new GATKChunk(blockAddress,blockOffsetStart,blockEnd,blockOffsetEnd);
+
+            if(newChunk.getChunkStart() <= newChunk.getChunkEnd())
+                spansOverlapping.add(new GATKChunk(blockAddress,blockOffsetStart,blockEnd,blockOffsetEnd));
+
+            // If the value currently stored in the position iterator ends past the current block, we must be done.  Abort.
+            if(!positionIterator.hasNext() ||  positionIterator.peek().getBlockEnd() > blockAddress)
+                break;
+
+            // If the position iterator ends before the block ends, pull the position iterator forward.
+            if(positionIterator.peek().getBlockEnd() <= blockAddress)
+                positionIterator.next();
+        }
+
+        return spansOverlapping;
     }
 
     public void reset() {
@@ -111,20 +147,16 @@ class SAMReaderPosition {
      * @param filePosition The current position within the file.
      */
     void advancePosition(final long filePosition) {
-        nextBlockAddress = filePosition >> 16;
+        nextBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(filePosition);
 
         // Check the current file position against the iterator; if the iterator is before the current file position,
         // draw the iterator forward.  Remember when performing the check that coordinates are half-open!
-        while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
+        while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek()))
             positionIterator.next();
 
-            // If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block.
-            if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart()) {
-                nextBlockAddress = positionIterator.peek().getBlockStart();
-                //System.out.printf("SAMReaderPosition: next block address advanced to %d%n",nextBlockAddress);
-                break;
-            }
-        }
+        // If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block.
+        if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart())
+            nextBlockAddress = positionIterator.peek().getBlockStart();
 
         // If we've shot off the end of the block pointer, notify consumers that iteration is complete.
         if(!positionIterator.hasNext())
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java
index f468d2020..d75e91bf3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java
@@ -44,12 +44,12 @@ public class BGZFBlockLoadingDispatcher {
 
     private final ExecutorService threadPool;
 
-    private final Queue<SAMReaderPosition> inputQueue;
+    private final Queue<BAMAccessPlan> inputQueue;
 
     public BGZFBlockLoadingDispatcher(final int numThreads, final int numFileHandles) {
         threadPool = Executors.newFixedThreadPool(numThreads);
         fileHandleCache = new FileHandleCache(numFileHandles);
-        inputQueue = new LinkedList<SAMReaderPosition>();
+        inputQueue = new LinkedList<BAMAccessPlan>();
 
         threadPool.execute(new BlockLoader(this,fileHandleCache,true));
     }
@@ -58,7 +58,7 @@ public class BGZFBlockLoadingDispatcher {
      * Initiates a request for a new block load.
       * @param readerPosition Position at which to load.
      */
-    void queueBlockLoad(final SAMReaderPosition readerPosition) {
+    void queueBlockLoad(final BAMAccessPlan readerPosition) {
         synchronized(inputQueue) {
             inputQueue.add(readerPosition);
             inputQueue.notify();
@@ -69,7 +69,7 @@ public class BGZFBlockLoadingDispatcher {
      * Claims the next work request from the queue.
      * @return The next work request, or null if none is available.
      */
-    SAMReaderPosition claimNextWorkRequest() {
+    BAMAccessPlan claimNextWorkRequest() {
         synchronized(inputQueue) {
             while(inputQueue.isEmpty()) {
                 try {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
index cb37bad31..fda5d818c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
@@ -26,24 +26,21 @@ package org.broadinstitute.sting.gatk.datasources.reads;
 
 import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.GATKChunk;
-import net.sf.samtools.util.BAMInputStream;
-import net.sf.samtools.util.BlockCompressedFilePointerUtil;
 import net.sf.samtools.util.BlockCompressedInputStream;
-import net.sf.samtools.util.RuntimeEOFException;
-import net.sf.samtools.util.SeekableStream;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.LinkedList;
+import java.util.List;
 
 /**
  * Presents decompressed blocks to the SAMFileReader.
  */
-public class BlockInputStream extends SeekableStream implements BAMInputStream {
+public class BlockInputStream extends InputStream {
     /**
      * Mechanism for triggering block loads.
      */
@@ -65,9 +62,9 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
     private Throwable error;
 
     /**
-     * Current position.
+     * Current accessPlan.
      */
-    private SAMReaderPosition position;
+    private BAMAccessPlan accessPlan;
 
     /**
      * A stream of compressed data blocks.
@@ -94,11 +91,6 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
      */
     private final BlockCompressedInputStream validatingInputStream;
 
-    /**
-     * Has the buffer been filled since last request?
-     */
-    private boolean bufferFilled = false;
-
     /**
      * Create a new block presenting input stream with a dedicated buffer.
      * @param dispatcher the block loading messenger.
@@ -118,7 +110,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
 
         this.dispatcher = dispatcher;
         // TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
-        this.position = new SAMReaderPosition(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
+        this.accessPlan = new BAMAccessPlan(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
 
         // The block offsets / block positions guarantee that the ending offset/position in the data structure maps to
         // the point in the file just following the last read.  These two arrays should never be empty; initializing
@@ -151,7 +143,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
         synchronized(lock) {
             // Find the current block within the input stream.
             int blockIndex;
-            for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() >= blockOffsets.get(blockIndex + 1); blockIndex++)
+            for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() > blockOffsets.get(blockIndex+1); blockIndex++)
                 ;
             filePointer = blockPositions.get(blockIndex) + (buffer.position()-blockOffsets.get(blockIndex));
         }
@@ -164,51 +156,8 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
         return filePointer;
     }
 
-    public void seek(long target) {
-        //System.out.printf("Thread %s, BlockInputStream %s: seeking to block %d, offset %d%n",Thread.currentThread().getId(),this,BlockCompressedFilePointerUtil.getBlockAddress(target),BlockCompressedFilePointerUtil.getBlockOffset(target));
-        synchronized(lock) {
-            clearBuffers();
-
-            // Ensure that the position filled in by submitAccessPlan() is in sync with the seek target just specified.
-            position.advancePosition(target);
-
-            // If the position advances past the end of the target, that must mean that we seeked to a point at the end
-            // of one of the chunk list's subregions.  Make a note of our current position and punt on loading any data.
-            if(target < position.getBlockAddress() << 16) {
-                blockOffsets.clear();
-                blockOffsets.add(0);
-                blockPositions.clear();
-                blockPositions.add(target);
-            }
-            else {
-                waitForBufferFill();
-                // A buffer fill will load the relevant data from the shard, but the buffer position still needs to be
-                // advanced as appropriate.
-                Iterator<Integer> blockOffsetIterator = blockOffsets.descendingIterator();
-                Iterator<Long> blockPositionIterator = blockPositions.descendingIterator();
-                while(blockOffsetIterator.hasNext() && blockPositionIterator.hasNext()) {
-                    final int blockOffset = blockOffsetIterator.next();
-                    final long blockPosition = blockPositionIterator.next();
-                    if((blockPosition >> 16) == (target >> 16) && (blockPosition&0xFFFF) < (target&0xFFFF)) {
-                        buffer.position(blockOffset + (int)(target&0xFFFF)-(int)(blockPosition&0xFFFF));
-                        break;
-                    }
-                }
-            }
-
-            if(validatingInputStream != null) {
-                try {
-                    validatingInputStream.seek(target);
-                }
-                catch(IOException ex) {
-                    throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
-                }
-            }
-        }
-    }
-
     private void clearBuffers() {
-        this.position.reset();
+        this.accessPlan.reset();
 
         // Buffer semantics say that outside of a lock, buffer should always be prepared for reading.
         // Indicate no data to be read.
@@ -225,29 +174,41 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
     public boolean eof() {
         synchronized(lock) {
             // TODO: Handle multiple empty BGZF blocks at end of the file.
-            return position != null && (position.getBlockAddress() < 0 || position.getBlockAddress() >= length);
+            return accessPlan != null && (accessPlan.getBlockAddress() < 0 || accessPlan.getBlockAddress() >= length);
         }
     }
 
-    public void setCheckCrcs(final boolean check) {
-        // TODO: Implement
-    }
-
     /**
-     * Submits a new access plan for the given dataset.
-     * @param position The next seek point for BAM data in this reader.
+     * Submits a new access plan for the given dataset and seeks to the given point.
+     * @param accessPlan The next seek point for BAM data in this reader.
      */
-    public void submitAccessPlan(final SAMReaderPosition position) {
+    public void submitAccessPlan(final BAMAccessPlan accessPlan) {
         //System.out.printf("Thread %s: submitting access plan for block at position: %d%n",Thread.currentThread().getId(),position.getBlockAddress());
-        synchronized(lock) {
-            // Assume that the access plan is going to tell us to start where we are and move forward.
-            // If this isn't the case, we'll soon receive a seek request and the buffer will be forced to reset.
-            if(this.position != null && position.getBlockAddress() < this.position.getBlockAddress())
-                position.advancePosition(this.position.getBlockAddress() << 16);
+        this.accessPlan = accessPlan;
+        accessPlan.reset();
+
+        clearBuffers();
+
+        // Pull the iterator past any oddball chunks at the beginning of the shard (chunkEnd < chunkStart, empty chunks, etc).
+        // TODO: Don't pass these empty chunks in.
+        accessPlan.advancePosition(makeFilePointer(accessPlan.getBlockAddress(),0));
+
+        if(accessPlan.getBlockAddress() >= 0) {
+            waitForBufferFill();
         }
-        this.position = position;
+
+        if(validatingInputStream != null) {
+            try {
+                validatingInputStream.seek(makeFilePointer(accessPlan.getBlockAddress(),0));
+            }
+            catch(IOException ex) {
+                throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+            }
+        }
+
     }
 
+
     private void compactBuffer() {
         // Compact buffer to maximize storage space.
         int bytesToRemove = 0;
@@ -286,27 +247,14 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
      * Push contents of incomingBuffer into the end of this buffer.
      * MUST be called from a thread that is NOT the reader thread.
      * @param incomingBuffer The data being pushed into this input stream.
-     * @param position target position for the data.
+     * @param accessPlan target access plan for the data.
      * @param filePosition the current position of the file pointer
      */
-    public void copyIntoBuffer(final ByteBuffer incomingBuffer, final SAMReaderPosition position, final long filePosition) {
+    public void copyIntoBuffer(final ByteBuffer incomingBuffer, final BAMAccessPlan accessPlan, final long filePosition) {
         synchronized(lock) {
             try {
-                compactBuffer();
-                // Open up the buffer for more reading.
-                buffer.limit(buffer.capacity());
-
-                // Advance the position to take the most recent read into account.
-                final long lastBlockAddress = position.getBlockAddress();
-                final int blockOffsetStart = position.getFirstOffsetInBlock();
-                final int blockOffsetEnd = position.getLastOffsetInBlock();
-
-                // Where did this read end?  It either ended in the middle of a block (for a bounding chunk) or it ended at the start of the next block.
-                final long endOfRead = (blockOffsetEnd < incomingBuffer.remaining()) ? (lastBlockAddress << 16) | blockOffsetEnd : filePosition << 16;
-
-                byte[] validBytes = null;
                 if(validatingInputStream != null) {
-                    validBytes = new byte[incomingBuffer.remaining()];
+                    byte[] validBytes = new byte[incomingBuffer.remaining()];
 
                     byte[] currentBytes = new byte[incomingBuffer.remaining()];
                     int pos = incomingBuffer.position();
@@ -317,7 +265,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
                     incomingBuffer.position(pos);
 
                     long currentFilePointer = validatingInputStream.getFilePointer();
-                    validatingInputStream.seek(lastBlockAddress << 16);
+                    validatingInputStream.seek(makeFilePointer(accessPlan.getBlockAddress(), 0));
                     validatingInputStream.read(validBytes);
                     validatingInputStream.seek(currentFilePointer);
 
@@ -325,33 +273,41 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
                         throw new ReviewedStingException(String.format("Bytes being inserted into BlockInputStream %s are incorrect",this));
                 }
 
-                this.position = position;
-                position.advancePosition(filePosition << 16);
+                compactBuffer();
+                // Open up the buffer for more reading.
+                buffer.limit(buffer.capacity());
 
-                if(buffer.remaining() < incomingBuffer.remaining()) {
-                    //System.out.printf("Thread %s: waiting for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n",Thread.currentThread().getId(),buffer.remaining(),incomingBuffer.remaining());
+                // Get the spans overlapping this particular block...
+                List<GATKChunk> spansOverlapping = accessPlan.getSpansOverlappingBlock(accessPlan.getBlockAddress(),filePosition);
+
+                // ...and advance the block
+                this.accessPlan = accessPlan;
+                accessPlan.advancePosition(makeFilePointer(filePosition, 0));
+
+                if(buffer.remaining() < incomingBuffer.remaining())
                     lock.wait();
-                    //System.out.printf("Thread %s: waited for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n", Thread.currentThread().getId(), buffer.remaining(), incomingBuffer.remaining());
+
+                final int bytesInIncomingBuffer = incomingBuffer.limit();
+
+                for(GATKChunk spanOverlapping: spansOverlapping) {
+                    // Clear out the endcap tracking state and add in the starting position for this transfer.
+                    blockOffsets.removeLast();
+                    blockOffsets.add(buffer.position());
+                    blockPositions.removeLast();
+                    blockPositions.add(spanOverlapping.getChunkStart());
+
+                    // Stream the buffer into the data stream.
+                    incomingBuffer.limit((spanOverlapping.getBlockEnd() > spanOverlapping.getBlockStart()) ? bytesInIncomingBuffer : spanOverlapping.getBlockOffsetEnd());
+                    incomingBuffer.position(spanOverlapping.getBlockOffsetStart());
+                    buffer.put(incomingBuffer);
+
+                    // Add the endcap for this transfer.
+                    blockOffsets.add(buffer.position());
+                    blockPositions.add(spanOverlapping.getChunkEnd());
                 }
 
-                // Remove the last position in the list and add in the last read position, in case the two are different.
-                blockOffsets.removeLast();
-                blockOffsets.add(buffer.position());
-                blockPositions.removeLast();
-                blockPositions.add(lastBlockAddress << 16 | blockOffsetStart);
-
-                // Stream the buffer into the data stream.
-                incomingBuffer.position(blockOffsetStart);
-                incomingBuffer.limit(Math.min(incomingBuffer.limit(),blockOffsetEnd));
-                buffer.put(incomingBuffer);
-
-                // Then, add the last position read to the very end of the list, just past the end of the last buffer.
-                blockOffsets.add(buffer.position());
-                blockPositions.add(endOfRead);
-
                 // Set up the buffer for reading.
                 buffer.flip();
-                bufferFilled = true;
 
                 lock.notify();
             }
@@ -447,12 +403,8 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
         if(remaining < length)
             return length - remaining;
 
-        // Otherwise, if at eof(), return -1.
-        else if(eof())
-            return -1;
-
-        // Otherwise, we must've hit a bug in the system.
-        throw new ReviewedStingException("BUG: read returned no data, but eof() reports false.");
+        // Otherwise, return -1.
+        return -1;
     }
 
     public void close() {
@@ -472,20 +424,26 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
 
     private void waitForBufferFill() {
         synchronized(lock) {
-            bufferFilled = false;
             if(buffer.remaining() == 0 && !eof()) {
                 //System.out.printf("Thread %s is waiting for a buffer fill from position %d to buffer %s%n",Thread.currentThread().getId(),position.getBlockAddress(),this);
-                dispatcher.queueBlockLoad(position);
+                dispatcher.queueBlockLoad(accessPlan);
                 try {
                     lock.wait();
                 }
                 catch(InterruptedException ex) {
                     throw new ReviewedStingException("Interrupt occurred waiting for buffer to fill",ex);
                 }
-
-                if(bufferFilled && buffer.remaining() == 0)
-                    throw new RuntimeEOFException("No more data left in InputStream");
             }
         }
     }
+
+    /**
+     * Create an encoded BAM file pointer given the address of a BGZF block and an offset.
+     * @param blockAddress Physical address on disk of a BGZF block.
+     * @param blockOffset Offset into the uncompressed data stored in the BGZF block.
+     * @return 64-bit pointer encoded according to the BAM spec.
+     */
+    public static long makeFilePointer(final long blockAddress, final int blockOffset) {
+        return blockAddress << 16 | blockOffset;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
index ab4299802..81a37e53c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -70,29 +70,29 @@ class BlockLoader implements Runnable {
 
     public void run() {
         for(;;) {
-            SAMReaderPosition readerPosition = null;
+            BAMAccessPlan accessPlan = null;
             try {
-                readerPosition = dispatcher.claimNextWorkRequest();
-                FileInputStream inputStream = fileHandleCache.claimFileInputStream(readerPosition.getReader());
+                accessPlan = dispatcher.claimNextWorkRequest();
+                FileInputStream inputStream = fileHandleCache.claimFileInputStream(accessPlan.getReader());
 
-                long blockAddress = readerPosition.getBlockAddress();
+                //long blockAddress = readerPosition.getBlockAddress();
                 //System.out.printf("Thread %s: BlockLoader: copying bytes from %s at position %d into %s%n",Thread.currentThread().getId(),inputStream,blockAddress,readerPosition.getInputStream());
 
-                ByteBuffer compressedBlock = readBGZFBlock(inputStream,readerPosition.getBlockAddress());
+                ByteBuffer compressedBlock = readBGZFBlock(inputStream,accessPlan.getBlockAddress());
                 long nextBlockAddress = position(inputStream);
-                fileHandleCache.releaseFileInputStream(readerPosition.getReader(),inputStream);
+                fileHandleCache.releaseFileInputStream(accessPlan.getReader(),inputStream);
 
                 ByteBuffer block = decompress ? decompressBGZFBlock(compressedBlock) : compressedBlock;
                 int bytesCopied = block.remaining();
 
-                BlockInputStream bamInputStream = readerPosition.getInputStream();
-                bamInputStream.copyIntoBuffer(block,readerPosition,nextBlockAddress);
+                BlockInputStream bamInputStream = accessPlan.getInputStream();
+                bamInputStream.copyIntoBuffer(block,accessPlan,nextBlockAddress);
 
                 //System.out.printf("Thread %s: BlockLoader: copied %d bytes from %s at position %d into %s%n",Thread.currentThread().getId(),bytesCopied,inputStream,blockAddress,readerPosition.getInputStream());
             }
             catch(Throwable error) {
-                if(readerPosition != null && readerPosition.getInputStream() != null)
-                    readerPosition.getInputStream().reportException(error);
+                if(accessPlan != null && accessPlan.getInputStream() != null)
+                    accessPlan.getInputStream().reportException(error);
             }
         }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java
index 8d73b1b15..96b55674a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java
@@ -36,7 +36,7 @@ import java.util.Map;
  */
 public class ReadShard extends Shard {
     /**
-     * What is the maximum number of reads which should go into a read shard.
+     * What is the maximum number of reads per BAM file which should go into a read shard.
      */
     public static int MAX_READS = 10000;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index c040b53c4..a4681cffd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -567,9 +567,14 @@ public class SAMDataSource {
 
             if(threadAllocation.getNumIOThreads() > 0) {
                 BlockInputStream inputStream = readers.getInputStream(id);
-                inputStream.submitAccessPlan(new SAMReaderPosition(id,inputStream,(GATKBAMFileSpan)shard.getFileSpans().get(id)));
+                inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
+                BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
+                codec.setInputStream(inputStream);
+                iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
+            }
+            else {
+                iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
             }
-            iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
             if(shard.getGenomeLocs().size() > 0)
                 iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
             iteratorMap.put(readers.getReader(id), iterator);
@@ -577,8 +582,6 @@ public class SAMDataSource {
 
         MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
 
-
-
         return applyDecoratingIterators(shard.getReadMetrics(),
                 enableVerification,
                 readProperties.useOriginalBaseQualities(),
@@ -592,6 +595,49 @@ public class SAMDataSource {
                 readProperties.defaultBaseQualities());
     }
 
+    private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
+        private final BlockInputStream inputStream;
+        private final SAMFileReader reader;
+        private final BAMRecordCodec codec;
+        private SAMRecord nextRead;
+
+        private BAMCodecIterator(final BlockInputStream inputStream, final SAMFileReader reader, final BAMRecordCodec codec) {
+            this.inputStream = inputStream;
+            this.reader = reader;
+            this.codec = codec;
+            advance();
+        }
+
+        public boolean hasNext() {
+            return nextRead != null;
+        }
+
+        public SAMRecord next() {
+            if(!hasNext())
+                throw new NoSuchElementException("Unable to retrieve next record from BAMCodecIterator; input stream is empty");
+            SAMRecord currentRead = nextRead;
+            advance();
+            return currentRead;
+        }
+
+        public void close() {
+            // NO-OP.
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("Unable to remove from BAMCodecIterator");
+        }
+
+        private void advance() {
+            final long startCoordinate = inputStream.getFilePointer();
+            nextRead = codec.decode();
+            final long stopCoordinate = inputStream.getFilePointer();
+
+            if(reader != null && nextRead != null)
+                PicardNamespaceUtils.setFileSource(nextRead,new SAMFileSource(reader,new GATKBAMFileSpan(new GATKChunk(startCoordinate,stopCoordinate))));
+        }
+    }
+
     /**
      * Filter reads based on user-specified criteria.
      *
@@ -871,12 +917,9 @@ public class SAMDataSource {
         public ReaderInitializer call() {
             final File indexFile = findIndexFile(readerID.samFile);
             try {
-                if (threadAllocation.getNumIOThreads() > 0) {
+                if (threadAllocation.getNumIOThreads() > 0)
                     blockInputStream = new BlockInputStream(dispatcher,readerID,false);
-                    reader = new SAMFileReader(blockInputStream,indexFile,false);
-                }
-                else
-                    reader = new SAMFileReader(readerID.samFile,indexFile,false);
+                reader = new SAMFileReader(readerID.samFile,indexFile,false);
             } catch ( RuntimeIOException e ) {
                 if ( e.getCause() != null && e.getCause() instanceof FileNotFoundException )
                     throw new UserException.CouldNotReadInputFile(readerID.samFile, e);

From 2f800b078c07c1a6fc1dd5ed97282dc17c177e7e Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 8 Feb 2012 15:27:16 -0500
Subject: [PATCH 32/67] Changes to default behavior of UG: multi-allelic mode
 is always on; max number of alternate alleles to genotype is 3; alleles in
 the SNP model are ranked by their likelihood sum (Guillermo will do this for
 indels); SB is computed again.

---
 ...elGenotypeLikelihoodsCalculationModel.java |  5 +-
 ...NPGenotypeLikelihoodsCalculationModel.java | 85 ++++++++++---------
 .../genotyper/UnifiedArgumentCollection.java  | 20 ++---
 .../walkers/genotyper/UnifiedGenotyper.java   |  2 +-
 .../genotyper/UnifiedGenotyperEngine.java     |  2 +-
 .../UnifiedGenotyperIntegrationTest.java      | 27 ++++--
 6 files changed, 72 insertions(+), 69 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 0422fbf03..49c131ce2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -55,9 +55,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
     private final boolean getAlleleListFromVCF;
 
     private boolean DEBUG = false;
-    private final boolean doMultiAllelicCalls;
+    private final boolean doMultiAllelicCalls = true;
     private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
-    private final int maxAlternateAlleles;
     private PairHMMIndelErrorModel pairModel;
 
     private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
@@ -88,8 +87,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
         minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
         HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
         DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
-        maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
-        doMultiAllelicCalls = UAC.MULTI_ALLELIC;
 
         haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
         ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index ea53c815d..6f1f86c6d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -43,13 +43,24 @@ import java.util.*;
 
 public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
 
-    private boolean ALLOW_MULTIPLE_ALLELES;
-
     private final boolean useAlleleFromVCF;
 
+    final LikelihoodSum[] likelihoodSums = new LikelihoodSum[4];
+
+    private final class LikelihoodSum implements Comparable<LikelihoodSum> {
+        public double sum = 0.0;
+        public Allele base;
+
+        public LikelihoodSum(Allele base) { this.base = base; }
+
+        public int compareTo(LikelihoodSum other) {
+            final double diff = sum - other.sum;
+            return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
+        }
+    }
+
     protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
         super(UAC, logger);
-        ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
         useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
 
         // make sure the PL cache has been initialized with enough alleles
@@ -69,7 +80,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
         if ( !(priors instanceof DiploidSNPGenotypePriors) )
             throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
 
-        final boolean[] basesToUse = new boolean[4];
         final byte refBase = ref.getBase();
         final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
 
@@ -95,46 +105,40 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
 
         // find the alternate allele(s) that we should be using
         if ( alternateAlleleToUse != null ) {
-            basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
+            alleles.add(alternateAlleleToUse);
         } else if ( useAlleleFromVCF ) {
             final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
 
             // ignore places where we don't have a SNP
             if ( vc == null || !vc.isSNP() )
                 return null;
-
-            for ( Allele allele : vc.getAlternateAlleles() )
-                basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
+           
+            alleles.addAll(vc.getAlternateAlleles());
         } else {
 
-            determineAlternateAlleles(basesToUse, refBase, GLs);
-
-            // how many alternate alleles are we using?
-            int alleleCounter = Utils.countSetBits(basesToUse);
+            alleles.addAll(determineAlternateAlleles(refBase, GLs));
 
             // if there are no non-ref alleles...
-            if ( alleleCounter == 0 ) {
+            if ( alleles.size() == 1 ) {
                 // if we only want variants, then we don't need to calculate genotype likelihoods
                 if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
                     return builder.make();
 
                 // otherwise, choose any alternate allele (it doesn't really matter)
-                basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
+                alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
              }
         }
 
         // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
-        final int numAltAlleles = Utils.countSetBits(basesToUse);
-        final int[] alleleOrdering = new int[numAltAlleles + 1];
-        alleleOrdering[0] = indexOfRefBase;
-        int alleleOrderingIndex = 1;
-        int numLikelihoods = 1;
-        for ( int i = 0; i < 4; i++ ) {
-            if ( i != indexOfRefBase && basesToUse[i] ) {
-                alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
-                alleleOrdering[alleleOrderingIndex++] = i;
-                numLikelihoods += alleleOrderingIndex;
-            }
+        final int numAlleles = alleles.size();
+        final int numAltAlleles = numAlleles - 1;
+
+        final int[] alleleOrdering = new int[numAlleles];
+        int alleleOrderingIndex = 0;
+        int numLikelihoods = 0;
+        for ( Allele allele : alleles ) {
+            alleleOrdering[alleleOrderingIndex++] = BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0]);
+            numLikelihoods += alleleOrderingIndex;
         }
         builder.alleles(alleles);
 
@@ -165,13 +169,14 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
 
         return builder.genotypes(genotypes).make();
     }
-
-    // fills in the allelesToUse array
-    protected void determineAlternateAlleles(final boolean[] allelesToUse, final byte ref, final List<SampleGenotypeData> sampleDataList) {
+    
+    // determines the alleles to use
+    protected List<Allele> determineAlternateAlleles(final byte ref, final List<SampleGenotypeData> sampleDataList) {
 
         final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref);
         final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal();
-        final double[] likelihoodCounts = new double[4];
+        for ( int i = 0; i < 4; i++ )
+            likelihoodSums[i] = new LikelihoodSum(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
 
         // based on the GLs, find the alternate alleles with the most probability
         for ( SampleGenotypeData sampleData : sampleDataList ) {
@@ -180,25 +185,21 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
             if ( PLindexOfBestGL != PLindexOfRef ) {
                 int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL];
                 if ( alleles[0] != baseIndexOfRef )
-                    likelihoodCounts[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
+                    likelihoodSums[alleles[0]].sum += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
                 // don't double-count it
                 if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] )
-                    likelihoodCounts[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
+                    likelihoodSums[alleles[1]].sum += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
             }
         }
 
-        if ( ALLOW_MULTIPLE_ALLELES ) {
-            for ( int i = 0; i < 4; i++ ) {
-                if ( likelihoodCounts[i] > 0.0 ) {
-                    allelesToUse[i] = true;
-                }
-            }
-        } else {
-            // set the non-ref base which has the maximum sum of non-ref GLs
-            final int indexOfMax = MathUtils.maxElementIndex(likelihoodCounts);
-            if ( likelihoodCounts[indexOfMax] > 0.0 )
-                allelesToUse[indexOfMax] = true;
+        Collections.sort(Arrays.asList(likelihoodSums));
+        final List<Allele> allelesToUse = new ArrayList<Allele>(3);
+        for ( LikelihoodSum sum : likelihoodSums ) {
+            if ( sum.sum > 0.0 )
+                allelesToUse.add(sum.base);
         }
+
+        return allelesToUse;
     }
 
     public ReadBackedPileup createBAQedPileup( final ReadBackedPileup pileup ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 16159393f..82e411c25 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -84,8 +84,8 @@ public class UnifiedArgumentCollection {
     /**
      * This argument is not enabled by default because it increases the runtime by an appreciable amount.
      */
-    @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
-    public boolean COMPUTE_SLOD = false;
+    @Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
+    public boolean NO_SLOD = false;
 
     /**
      * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
@@ -103,21 +103,12 @@ public class UnifiedArgumentCollection {
     @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
     public Double MAX_DELETION_FRACTION = 0.05;
 
-    /**
-     * The default behavior of the Unified Genotyper is to allow the genotyping of just one alternate allele in discovery mode; using this flag
-     * will enable the discovery of multiple alternate alleles.  Please note that this works for SNPs only and that it is still highly experimental.
-     * For advanced users only.
-     */
-    @Advanced
-    @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false)
-    public boolean MULTI_ALLELIC = false;
-
     /**
      * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
-     * then this site will be skipped and a warning printed.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
+     * then only this many alleles will be used.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive.
      */
     @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
-    public int MAX_ALTERNATE_ALLELES = 5;
+    public int MAX_ALTERNATE_ALLELES = 3;
 
     // indel-related arguments
     /**
@@ -168,7 +159,7 @@ public class UnifiedArgumentCollection {
         uac.PCR_error = PCR_error;
         uac.GenotypingMode = GenotypingMode;
         uac.OutputMode = OutputMode;
-        uac.COMPUTE_SLOD = COMPUTE_SLOD;
+        uac.NO_SLOD = NO_SLOD;
         uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
         uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
         uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
@@ -185,7 +176,6 @@ public class UnifiedArgumentCollection {
         // todo- arguments to remove
         uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
         uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
-        uac.MULTI_ALLELIC = MULTI_ALLELIC;
         return uac;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index b3f0954a2..1106fcb52 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -240,7 +240,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
         headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
 
         // annotation (INFO) fields from UnifiedGenotyper
-        if ( UAC.COMPUTE_SLOD )
+        if ( !UAC.NO_SLOD )
             headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
         headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 2da4b66c4..2eba6d884 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -407,7 +407,7 @@ public class UnifiedGenotyperEngine {
         if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
             attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
 
-        if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
+        if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
             //final boolean DEBUG_SLOD = false;
 
             // the overall lod
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 7285b0fb8..fd6738123 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -15,9 +15,9 @@ import java.util.Map;
 
 public class UnifiedGenotyperIntegrationTest extends WalkerTest {
 
-    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
-    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
-    private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
+    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
+    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
+    private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
 
     // --------------------------------------------------------------------------------------------------------------
     //
@@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultiSamplePilot1() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("653172b43b19003d9f7df6dab21f4b09"));
+                Arrays.asList("9ab4e98ce437a1c5e1eee338de49ee7e"));
         executeTest("test MultiSample Pilot1", spec);
     }
 
@@ -56,6 +56,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         executeTest("test SingleSample Pilot2", spec);
     }
 
+    @Test
+    public void testMultipleSNPAlleles() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
+                Arrays.asList("aabc4b3a312aba18b78e14750d8c8e62"));
+        executeTest("test Multiple SNP alleles", spec);
+    }
+
     // --------------------------------------------------------------------------------------------------------------
     //
     // testing compressed output
@@ -114,8 +122,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testCallingParameters() {
         HashMap<String, String> e = new HashMap<String, String>();
-        e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
-        e.put( "--computeSLOD", "6172d2f3d370132f4c57a26aa94c256e" );
+        e.put( "--min_base_quality_score 26", "258c1b33349eb3b2d395ec4d69302725" );
 
         for ( Map.Entry<String, String> entry : e.entrySet() ) {
             WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@@ -125,6 +132,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         }
     }
 
+    @Test
+    public void testSLOD() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
+                Arrays.asList("6172d2f3d370132f4c57a26aa94c256e"));
+        executeTest("test SLOD", spec);
+    }
+
     @Test
     public void testOutputParameter() {
         HashMap<String, String> e = new HashMap<String, String>();

From d561914d4fb49b169e75487ee4286762cf6c74b7 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 8 Feb 2012 23:28:55 -0500
Subject: [PATCH 35/67] Revert "First implementation of GATKReportGatherer"

premature push from my part. Roger is still working on the new format and we need to update the other tools to operate correctly with the new GATKReport.

This reverts commit aea0de314220810c2666055dc75f04f9010436ad.
---
 .../sting/gatk/report/GATKReport.java         |  65 ++---
 .../sting/gatk/report/GATKReportGatherer.java |  46 ----
 .../sting/gatk/report/GATKReportTable.java    | 252 +++++++-----------
 .../sting/gatk/report/GATKReportUnitTest.java | 128 ++-------
 4 files changed, 134 insertions(+), 357 deletions(-)
 delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index c0abe7450..608b5d1d0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -24,8 +24,7 @@ public class GATKReport {
 
     /**
      * Create a new GATKReport with the contents of a GATKReport on disk.
-     *
-     * @param filename the path to the file to load
+     * @param filename  the path to the file to load
      */
     public GATKReport(String filename) {
         this(new File(filename));
@@ -33,8 +32,7 @@ public class GATKReport {
 
     /**
      * Create a new GATKReport with the contents of a GATKReport on disk.
-     *
-     * @param file the file to load
+     * @param file  the file to load
      */
     public GATKReport(File file) {
         loadReport(file);
@@ -42,8 +40,7 @@ public class GATKReport {
 
     /**
      * Load a GATKReport file from disk
-     *
-     * @param file the file to load
+     * @param file  the file to load
      */
     private void loadReport(File file) {
         try {
@@ -51,11 +48,12 @@ public class GATKReport {
 
             GATKReportTable table = null;
             String[] header = null;
+            int id = 0;
             GATKReportVersion version = null;
             List<Integer> columnStarts = null;
 
             String line;
-            while ((line = reader.readLine()) != null) {
+            while ( (line = reader.readLine()) != null ) {
 
                 if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
 
@@ -73,7 +71,7 @@ public class GATKReport {
 
                     header = null;
                     columnStarts = null;
-                } else if (line.trim().isEmpty()) {
+                } else if ( line.trim().isEmpty() ) {
                     // do nothing
                 } else {
                     if (table != null) {
@@ -99,22 +97,19 @@ public class GATKReport {
                         if (header == null) {
                             header = splitLine;
 
-                            // Set the first column as the primary key
-                            table.addPrimaryKey(header[0]);
-                            // Set every other column as column
-                            for (int i = 1; i < header.length; i++) {
-                                table.addColumn(header[i], "");
+                            table.addPrimaryKey("id", false);
+
+                            for ( String columnName : header ) {
+                                table.addColumn(columnName, "");
                             }
 
+                            id = 0;
                         } else {
-                            //Get primary key Value from the current line array
-                            String primaryKey = splitLine[0];
-                            //Input all the remaining values
-                            for (int columnIndex = 1; columnIndex < header.length; columnIndex++) {
-                                table.set(primaryKey, header[columnIndex], splitLine[columnIndex]);
+                            for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
+                                table.set(id, header[columnIndex], splitLine[columnIndex]);
                             }
 
-
+                            id++;
                         }
                     }
                 }
@@ -129,8 +124,8 @@ public class GATKReport {
     /**
      * Add a new table to the collection
      *
-     * @param tableName        the name of the table
-     * @param tableDescription the description of the table
+     * @param tableName  the name of the table
+     * @param tableDescription  the description of the table
      */
     public void addTable(String tableName, String tableDescription) {
         addTable(tableName, tableDescription, true);
@@ -144,7 +139,7 @@ public class GATKReport {
     /**
      * Return true if table with a given name exists
      *
-     * @param tableName the name of the table
+     * @param tableName  the name of the table
      * @return true if the table exists, false otherwise
      */
     public boolean hasTable(String tableName) {
@@ -154,8 +149,8 @@ public class GATKReport {
     /**
      * Return a table with a given name
      *
-     * @param tableName the name of the table
-     * @return the table object
+     * @param tableName  the name of the table
+     * @return  the table object
      */
     public GATKReportTable getTable(String tableName) {
         GATKReportTable table = tables.get(tableName);
@@ -167,7 +162,7 @@ public class GATKReport {
     /**
      * Print all tables contained within this container to a PrintStream
      *
-     * @param out the PrintStream to which the tables should be written
+     * @param out  the PrintStream to which the tables should be written
      */
     public void print(PrintStream out) {
         for (GATKReportTable table : tables.values()) {
@@ -180,24 +175,4 @@ public class GATKReport {
     public Collection<GATKReportTable> getTables() {
         return tables.values();
     }
-
-    public void combineWith(GATKReport input) {
-
-        // For every input table, add values
-        System.out.println("This.tables: keySet");
-        for (String s : tables.keySet())
-            System.out.println(s);
-
-        // todo test tables exist
-
-
-        for (String tableName : input.tables.keySet()) {
-            System.out.println("Input table key: " + tableName);
-            if (tables.containsKey(tableName))
-                tables.get(tableName).mergeRows(input.getTable(tableName));
-            else
-                throw new ReviewedStingException("Failed to combine GATKReport, tables don't match!");
-        }
-
-    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java
deleted file mode 100644
index 0d15971ae..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportGatherer.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package org.broadinstitute.sting.gatk.report;
-
-import org.broadinstitute.sting.commandline.Gatherer;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.PrintStream;
-import java.util.List;
-
-/**
- * Created by IntelliJ IDEA.
- * User: roger
- * Date: 1/9/12
- * Time: 11:17 PM
- * To change this template use File | Settings | File Templates.
- */
-public class GATKReportGatherer extends Gatherer {
-    @Override
-    public void gather(List<File> inputs, File output) {
-        //Combines inputs GATKReport to one output
-
-        PrintStream o;
-        try {
-            o = new PrintStream(output);
-        } catch (FileNotFoundException e) {
-            throw new UserException("File to be output by CoverageByRG Gather function was not found");
-        }
-
-        GATKReport current = new GATKReport();
-        boolean isFirst = true;
-        for (File input : inputs) {
-
-            // If the table is empty
-            if (isFirst) {
-                current = new GATKReport(input);
-                isFirst = false;
-            } else {
-                GATKReport toAdd = new GATKReport(input);
-                current.combineWith(toAdd);
-            }
-        }
-
-        current.print(o);
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
index ac18891d7..b72b20e0b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@@ -4,10 +4,7 @@ import org.apache.commons.lang.ObjectUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.io.PrintStream;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.TreeSet;
+import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -15,12 +12,12 @@ import java.util.regex.Pattern;
  * A data structure that allows data to be collected over the course of a walker's computation, then have that data
  * written to a PrintStream such that it's human-readable, AWK-able, and R-friendly (given that you load it using the
  * GATKReport loader module).
- * <p/>
+ *
  * The goal of this object is to use the same data structure for both accumulating data during a walker's computation
  * and emitting that data to a file for easy analysis in R (or any other program/language that can take in a table of
  * results).  Thus, all of the infrastructure below is designed simply to make printing the following as easy as
  * possible:
- * <p/>
+ *
  * ##:GATKReport.v0.1 ErrorRatePerCycle : The error rate per sequenced position in the reads
  * cycle  errorrate.61PA8.7         qualavg.61PA8.7
  * 0      0.007451835696110506      25.474613284804366
@@ -32,60 +29,60 @@ import java.util.regex.Pattern;
  * 6      5.452562704471102E-4      36.1217248908297
  * 7      5.452562704471102E-4      36.1910480349345
  * 8      5.452562704471102E-4      36.00345705967977
- * <p/>
+ *
  * Here, we have a GATKReport table - a well-formatted, easy to read representation of some tabular data.  Every single
  * table has this same GATKReport.v0.1 header, which permits multiple files from different sources to be cat-ed
  * together, which makes it very easy to pull tables from different programs into R via a single file.
- * <p/>
+ *
  * ------------
  * Definitions:
- * <p/>
+ *
  * Table info:
- * The first line, structured as
- * ##:<report version> <table name> : <table description>
- * <p/>
+ *   The first line, structured as
+ *     ##:<report version> <table name> : <table description>
+ *
  * Table header:
- * The second line, specifying a unique name for each column in the table.
- * <p/>
- * The first column mentioned in the table header is the "primary key" column - a column that provides the unique
- * identifier for each row in the table.  Once this column is created, any element in the table can be referenced by
- * the row-column coordinate, i.e. "primary key"-"column name" coordinate.
- * <p/>
- * When a column is added to a table, a default value must be specified (usually 0).  This is the initial value for
- * an element in a column.  This permits operations like increment() and decrement() to work properly on columns that
- * are effectively counters for a particular event.
- * <p/>
- * Finally, the display property for each column can be set during column creation.  This is useful when a given
- * column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
- * In these cases, it's obviously necessary to store the value required for further computation, but it's not
- * necessary to actually print the intermediate column.
- * <p/>
+ *   The second line, specifying a unique name for each column in the table.
+ *
+ *   The first column mentioned in the table header is the "primary key" column - a column that provides the unique
+ *   identifier for each row in the table.  Once this column is created, any element in the table can be referenced by
+ *   the row-column coordinate, i.e. "primary key"-"column name" coordinate.
+ *
+ *   When a column is added to a table, a default value must be specified (usually 0).  This is the initial value for
+ *   an element in a column.  This permits operations like increment() and decrement() to work properly on columns that
+ *   are effectively counters for a particular event.
+ *
+ *   Finally, the display property for each column can be set during column creation.  This is useful when a given
+ *   column stores an intermediate result that will be used later on, perhaps to calculate the value of another column.
+ *   In these cases, it's obviously necessary to store the value required for further computation, but it's not
+ *   necessary to actually print the intermediate column.
+ *
  * Table body:
- * The values of the table itself.
- * <p/>
+ *   The values of the table itself.
+ *
  * ---------------
  * Implementation:
- * <p/>
+ *
  * The implementation of this table has two components:
- * 1. A TreeSet<Object> that stores all the values ever specified for the primary key.  Any get() operation that
- * refers to an element where the primary key object does not exist will result in its implicit creation.  I
- * haven't yet decided if this is a good idea...
- * <p/>
- * 2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents.  Each
- * GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
- * primary key and the column value.  This means that, given N columns, the primary key information is stored
- * N+1 times.  This is obviously wasteful and can likely be handled much more elegantly in future implementations.
- * <p/>
+ *   1. A TreeSet<Object> that stores all the values ever specified for the primary key.  Any get() operation that
+ *      refers to an element where the primary key object does not exist will result in its implicit creation.  I
+ *      haven't yet decided if this is a good idea...
+ *
+ *   2. A HashMap<String, GATKReportColumn> that stores a mapping from column name to column contents.  Each
+ *      GATKReportColumn is effectively a map (in fact, GATKReportColumn extends TreeMap<Object, Object>) between
+ *      primary key and the column value.  This means that, given N columns, the primary key information is stored
+ *      N+1 times.  This is obviously wasteful and can likely be handled much more elegantly in future implementations.
+ *
  * ------------------------------
  * Element and column operations:
- * <p/>
+ *
  * In addition to simply getting and setting values, this object also permits some simple operations to be applied to
  * individual elements or to whole columns.  For instance, an element can be easily incremented without the hassle of
  * calling get(), incrementing the obtained value by 1, and then calling set() with the new value.  Also, some vector
  * operations are supported.  For instance, two whole columns can be divided and have the result be set to a third
  * column.  This is especially useful when aggregating counts in two intermediate columns that will eventually need to
  * be manipulated row-by-row to compute the final column.
- * <p/>
+ *
  * Note: I've made no attempt whatsoever to make these operations efficient.  Right now, some of the methods check the
  * type of the stored object using an instanceof call and attempt to do the right thing.  Others cast the contents of
  * the cell to a Number, call the Number.toDouble() method and compute a result.  This is clearly not the ideal design,
@@ -95,9 +92,7 @@ import java.util.regex.Pattern;
  * @author Khalid Shakir
  */
 public class GATKReportTable {
-    /**
-     * REGEX that matches any table with an invalid name
-     */
+    /** REGEX that matches any table with an invalid name */
     public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
     private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
     private String tableName;
@@ -114,8 +109,8 @@ public class GATKReportTable {
     /**
      * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
      *
-     * @param name the name of the table or column
-     * @return true if the name is valid, false if otherwise
+     * @param name  the name of the table or column
+     * @return  true if the name is valid, false if otherwise
      */
     private boolean isValidName(String name) {
         Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
@@ -127,8 +122,8 @@ public class GATKReportTable {
     /**
      * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
      *
-     * @param description the name of the table or column
-     * @return true if the name is valid, false if otherwise
+     * @param description  the name of the table or column
+     * @return  true if the name is valid, false if otherwise
      */
     private boolean isValidDescription(String description) {
         Pattern p = Pattern.compile("\\r|\\n");
@@ -140,15 +135,15 @@ public class GATKReportTable {
     /**
      * Construct a new GATK report table with the specified name and description
      *
-     * @param tableName        the name of the table
-     * @param tableDescription the description of the table
+     * @param tableName  the name of the table
+     * @param tableDescription  the description of the table
      */
     public GATKReportTable(String tableName, String tableDescription) {
         this(tableName, tableDescription, true);
     }
 
     public GATKReportTable(String tableName, String tableDescription, boolean sortByPrimaryKey) {
-        if (!isValidName(tableName)) {
+         if (!isValidName(tableName)) {
             throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'.  GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
         }
 
@@ -174,7 +169,7 @@ public class GATKReportTable {
     /**
      * Add a primary key column.  This becomes the unique identifier for every column in the table.
      *
-     * @param primaryKeyName the name of the primary key column
+     * @param primaryKeyName  the name of the primary key column
      */
     public void addPrimaryKey(String primaryKeyName) {
         addPrimaryKey(primaryKeyName, true);
@@ -183,8 +178,8 @@ public class GATKReportTable {
     /**
      * Add an optionally visible primary key column.  This becomes the unique identifier for every column in the table, and will always be printed as the first column.
      *
-     * @param primaryKeyName the name of the primary key column
-     * @param display        should this primary key be displayed?
+     * @param primaryKeyName  the name of the primary key column
+     * @param display should this primary key be displayed?
      */
     public void addPrimaryKey(String primaryKeyName, boolean display) {
         if (!isValidName(primaryKeyName)) {
@@ -200,7 +195,6 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
-     *
      * @param dottedColumnValues Period concatenated values.
      * @return The first primary key matching the column values or throws an exception.
      */
@@ -214,7 +208,6 @@ public class GATKReportTable {
     /**
      * Returns true if there is at least on row with the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
-     *
      * @param dottedColumnValues Period concatenated values.
      * @return true if there is at least one row matching the columns.
      */
@@ -225,7 +218,6 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the dotted column values.
      * Ex: dbsnp.eval.called.all.novel.all
-     *
      * @param dottedColumnValues Period concatenated values.
      * @return The first primary key matching the column values or null.
      */
@@ -236,7 +228,6 @@ public class GATKReportTable {
     /**
      * Returns the first primary key matching the column values.
      * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
-     *
      * @param columnValues column values.
      * @return The first primary key matching the column values.
      */
@@ -244,7 +235,7 @@ public class GATKReportTable {
         for (Object primaryKey : primaryKeyColumn) {
             boolean matching = true;
             for (int i = 0; matching && i < columnValues.length; i++) {
-                matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i + 1));
+                matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
             }
             if (matching)
                 return primaryKey;
@@ -255,8 +246,8 @@ public class GATKReportTable {
     /**
      * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
      *
-     * @param columnName   the name of the column
-     * @param defaultValue the default value for the column
+     * @param columnName  the name of the column
+     * @param defaultValue  the default value for the column
      */
     public void addColumn(String columnName, Object defaultValue) {
         addColumn(columnName, defaultValue, null);
@@ -265,13 +256,12 @@ public class GATKReportTable {
     public void addColumn(String columnName, Object defaultValue, String format) {
         addColumn(columnName, defaultValue, true, format);
     }
-
     /**
      * Add a column to the report, specify the default column value, and specify whether the column should be displayed in the final output (useful when intermediate columns are necessary for later calculations, but are not required to be in the output file.
      *
-     * @param columnName   the name of the column
-     * @param defaultValue the default value of the column
-     * @param display      if true - the column will be displayed; if false - the column will be hidden
+     * @param columnName  the name of the column
+     * @param defaultValue  the default value of the column
+     * @param display  if true - the column will be displayed; if false - the column will be hidden
      */
     public void addColumn(String columnName, Object defaultValue, boolean display) {
         addColumn(columnName, defaultValue, display, null);
@@ -287,8 +277,8 @@ public class GATKReportTable {
     /**
      * Check if the requested element exists, and if not, create it.
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
      */
     private void verifyEntry(Object primaryKey, String columnName) {
         if (!columns.containsKey(columnName)) {
@@ -309,9 +299,9 @@ public class GATKReportTable {
     /**
      * Set the value for a given position in the table
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
-     * @param value      the value to set
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
+     * @param value  the value to set
      */
     public void set(Object primaryKey, String columnName, Object value) {
         verifyEntry(primaryKey, columnName);
@@ -322,13 +312,13 @@ public class GATKReportTable {
     /**
      * Get a value from the given position in the table
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
-     * @return the value stored at the specified position in the table
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
+     * @return  the value stored at the specified position in the table
      */
     public Object get(Object primaryKey, String columnName) {
         verifyEntry(primaryKey, columnName);
-
+        
         return columns.get(columnName).get(primaryKey);
     }
 
@@ -337,7 +327,7 @@ public class GATKReportTable {
      *
      * @param primaryKey  the primary key value
      * @param columnIndex the index of the column
-     * @return the value stored at the specified position in the table
+     * @return  the value stored at the specified position in the table
      */
     private Object get(Object primaryKey, int columnIndex) {
         return columns.getByIndex(columnIndex).get(primaryKey);
@@ -346,8 +336,8 @@ public class GATKReportTable {
     /**
      * Increment an element in the table.  This implementation is awful - a functor would probably be better.
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
      */
     public void increment(Object primaryKey, String columnName) {
         Object oldValue = get(primaryKey, columnName);
@@ -375,8 +365,8 @@ public class GATKReportTable {
     /**
      * Decrement an element in the table.  This implementation is awful - a functor would probably be better.
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
      */
     public void decrement(Object primaryKey, String columnName) {
         Object oldValue = get(primaryKey, columnName);
@@ -404,9 +394,9 @@ public class GATKReportTable {
     /**
      * Add the specified value to an element in the table
      *
-     * @param primaryKey the primary key value
-     * @param columnName the name of the column
-     * @param valueToAdd the value to add
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
+     * @param valueToAdd  the value to add
      */
     public void add(Object primaryKey, String columnName, Object valueToAdd) {
         Object oldValue = get(primaryKey, columnName);
@@ -434,8 +424,8 @@ public class GATKReportTable {
     /**
      * Subtract the specified value from an element in the table
      *
-     * @param primaryKey      the primary key value
-     * @param columnName      the name of the column
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
      * @param valueToSubtract the value to subtract
      */
     public void subtract(Object primaryKey, String columnName, Object valueToSubtract) {
@@ -464,9 +454,9 @@ public class GATKReportTable {
     /**
      * Multiply the specified value to an element in the table
      *
-     * @param primaryKey      the primary key value
-     * @param columnName      the name of the column
-     * @param valueToMultiply the value to multiply by
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
+     * @param valueToMultiply  the value to multiply by
      */
     public void multiply(Object primaryKey, String columnName, Object valueToMultiply) {
         Object oldValue = get(primaryKey, columnName);
@@ -494,9 +484,9 @@ public class GATKReportTable {
     /**
      * Divide the specified value from an element in the table
      *
-     * @param primaryKey    the primary key value
-     * @param columnName    the name of the column
-     * @param valueToDivide the value to divide by
+     * @param primaryKey  the primary key value
+     * @param columnName  the name of the column
+     * @param valueToDivide  the value to divide by
      */
     public void divide(Object primaryKey, String columnName, Object valueToDivide) {
         Object oldValue = get(primaryKey, columnName);
@@ -524,9 +514,9 @@ public class GATKReportTable {
     /**
      * Add two columns to each other and set the results to a third column
      *
-     * @param columnToSet the column that should hold the results
-     * @param augend      the column that shall be the augend
-     * @param addend      the column that shall be the addend
+     * @param columnToSet  the column that should hold the results
+     * @param augend  the column that shall be the augend
+     * @param addend  the column that shall be the addend
      */
     public void addColumns(String columnToSet, String augend, String addend) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -542,8 +532,8 @@ public class GATKReportTable {
     /**
      * Subtract one column from another and set the results to a third column
      *
-     * @param columnToSet the column that should hold the results
-     * @param minuend     the column that shall be the minuend (the a in a - b)
+     * @param columnToSet  the column that should hold the results
+     * @param minuend  the column that shall be the minuend (the a in a - b)
      * @param subtrahend  the column that shall be the subtrahend (the b in a - b)
      */
     public void subtractColumns(String columnToSet, String minuend, String subtrahend) {
@@ -561,8 +551,8 @@ public class GATKReportTable {
      * Multiply two columns by each other and set the results to a third column
      *
      * @param columnToSet  the column that should hold the results
-     * @param multiplier   the column that shall be the multiplier
-     * @param multiplicand the column that shall be the multiplicand
+     * @param multiplier  the column that shall be the multiplier
+     * @param multiplicand  the column that shall be the multiplicand
      */
     public void multiplyColumns(String columnToSet, String multiplier, String multiplicand) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -578,9 +568,9 @@ public class GATKReportTable {
     /**
      * Divide two columns by each other and set the results to a third column
      *
-     * @param columnToSet       the column that should hold the results
-     * @param numeratorColumn   the column that shall be the numerator
-     * @param denominatorColumn the column that shall be the denominator
+     * @param columnToSet  the column that should hold the results
+     * @param numeratorColumn  the column that shall be the numerator
+     * @param denominatorColumn  the column that shall be the denominator
      */
     public void divideColumns(String columnToSet, String numeratorColumn, String denominatorColumn) {
         for (Object primaryKey : primaryKeyColumn) {
@@ -595,11 +585,10 @@ public class GATKReportTable {
 
     /**
      * Return the print width of the primary key column
-     *
-     * @return the width of the primary key column
+     * @return  the width of the primary key column
      */
     public int getPrimaryKeyColumnWidth() {
-        int maxWidth = getPrimaryKeyName().length();
+        int maxWidth = primaryKeyName.length();
 
         for (Object primaryKey : primaryKeyColumn) {
             int width = primaryKey.toString().length();
@@ -615,7 +604,7 @@ public class GATKReportTable {
     /**
      * Write the table to the PrintStream, formatted nicely to be human-readable, AWK-able, and R-friendly.
      *
-     * @param out the PrintStream to which the table should be written
+     * @param out  the PrintStream to which the table should be written
      */
     public void write(PrintStream out) {
         // Get the column widths for everything
@@ -631,15 +620,13 @@ public class GATKReportTable {
         // Emit the table header, taking into account the padding requirement if the primary key is a hidden column
         boolean needsPadding = false;
         if (primaryKeyDisplay) {
-            out.printf(primaryKeyFormat, getPrimaryKeyName());
+            out.printf(primaryKeyFormat, primaryKeyName);
             needsPadding = true;
         }
 
         for (String columnName : columns.keySet()) {
             if (columns.get(columnName).isDisplayable()) {
-                if (needsPadding) {
-                    out.printf("  ");
-                }
+                if (needsPadding) { out.printf("  "); }
                 out.printf(columnFormats.get(columnName).getNameFormat(), columnName);
 
                 needsPadding = true;
@@ -658,9 +645,7 @@ public class GATKReportTable {
 
             for (String columnName : columns.keySet()) {
                 if (columns.get(columnName).isDisplayable()) {
-                    if (needsPadding) {
-                        out.printf("  ");
-                    }
+                    if (needsPadding) { out.printf("  "); }
                     String value = columns.get(columnName).getStringValue(primaryKey);
                     out.printf(columnFormats.get(columnName).getValueFormat(), value);
 
@@ -690,49 +675,4 @@ public class GATKReportTable {
     public GATKReportColumns getColumns() {
         return columns;
     }
-
-    public void mergeRows(GATKReportTable input) {
-        /*
-         * This function is different from addRowsFrom because we will add the ability to sum,average, etc rows
-         * TODO: Add other combining algorithms
-         */
-
-        // Make sure the columns match AND the Primary Key
-        if (input.getColumns().keySet().equals(this.getColumns().keySet()) &&
-                input.getPrimaryKeyName().equals(this.getPrimaryKeyName())) {
-            this.addRowsFrom(input);
-        } else
-            throw new ReviewedStingException("Failed to combine GATKReportTable, columns don't match!");
-    }
-
-    public void addRowsFrom(GATKReportTable input) {
-        // add column by column
-
-        // For every column
-        for (String columnKey : input.getColumns().keySet()) {
-            GATKReportColumn current = this.getColumns().get(columnKey);
-            GATKReportColumn toAdd = input.getColumns().get(columnKey);
-            // We want to take the current column and add all the values from input
-
-            // The column is a map of values <Key, Value>
-            for (Object rowKey : toAdd.keySet()) {
-                // We add every value from toAdd to the current
-                if (!current.containsKey(rowKey)) {
-                    this.set(rowKey, columnKey, toAdd.get(rowKey));
-                    System.out.printf("Putting row with PK: %s \n", rowKey);
-                } else {
-
-                    // TODO we should be able to handle combining data by adding, averaging, etc.
-                    this.set(rowKey, columnKey, toAdd.get(rowKey));
-
-                    System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);
-                }
-            }
-        }
-
-    }
-
-    public String getPrimaryKeyName() {
-        return primaryKeyName;
-    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
index b9a89fcfe..b3b9ab555 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java
@@ -49,23 +49,23 @@ public class GATKReportUnitTest extends BaseTest {
 
     @DataProvider(name = "rightAlignValues")
     public Object[][] getRightAlignValues() {
-        return new Object[][]{
-                new Object[]{null, true},
-                new Object[]{"null", true},
-                new Object[]{"NA", true},
-                new Object[]{"0", true},
-                new Object[]{"0.0", true},
-                new Object[]{"-0", true},
-                new Object[]{"-0.0", true},
-                new Object[]{String.valueOf(Long.MAX_VALUE), true},
-                new Object[]{String.valueOf(Long.MIN_VALUE), true},
-                new Object[]{String.valueOf(Float.MIN_NORMAL), true},
-                new Object[]{String.valueOf(Double.MAX_VALUE), true},
-                new Object[]{String.valueOf(Double.MIN_VALUE), true},
-                new Object[]{String.valueOf(Double.POSITIVE_INFINITY), true},
-                new Object[]{String.valueOf(Double.NEGATIVE_INFINITY), true},
-                new Object[]{String.valueOf(Double.NaN), true},
-                new Object[]{"hello", false}
+        return new Object[][] {
+                new Object[] {null, true},
+                new Object[] {"null", true},
+                new Object[] {"NA", true},
+                new Object[] {"0", true},
+                new Object[] {"0.0", true},
+                new Object[] {"-0", true},
+                new Object[] {"-0.0", true},
+                new Object[] {String.valueOf(Long.MAX_VALUE), true},
+                new Object[] {String.valueOf(Long.MIN_VALUE), true},
+                new Object[] {String.valueOf(Float.MIN_NORMAL), true},
+                new Object[] {String.valueOf(Double.MAX_VALUE), true},
+                new Object[] {String.valueOf(Double.MIN_VALUE), true},
+                new Object[] {String.valueOf(Double.POSITIVE_INFINITY), true},
+                new Object[] {String.valueOf(Double.NEGATIVE_INFINITY), true},
+                new Object[] {String.valueOf(Double.NaN), true},
+                new Object[] {"hello", false}
         };
     }
 
@@ -73,96 +73,4 @@ public class GATKReportUnitTest extends BaseTest {
     public void testIsRightAlign(String value, boolean expected) {
         Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
     }
-
-    @Test
-    public void testGATKReportGatherer() {
-
-        /*
-        GATKReportTable actual1 = new GATKReportTable("TableName", "Description");
-        actual1.addPrimaryKey("key");
-        actual1.addColumn("colA", 0);
-        actual1.addColumn("colB", 0);
-        actual1.set("row1", "colA", 1);
-        actual1.set("row1", "colB", 2);
-
-        GATKReportTable actual2 = new GATKReportTable("TableName", "Description");
-        actual2.addPrimaryKey("key");
-        actual2.addColumn("colA", 0);
-        actual2.addColumn("colB", 0);
-        actual2.set("row2", "colA", 3);
-        actual2.set("row2", "colB", 4);
-
-        GATKReportTable actual3 = new GATKReportTable("TableName", "Description");
-        actual3.addPrimaryKey("key");
-        actual3.addColumn("colA", 0);
-        actual3.addColumn("colB", 0);
-        actual3.set("row3", "colA", 5);
-        actual3.set("row3", "colB", 6);
-
-        actual1.mergeRows(actual2);
-        actual1.mergeRows(actual3);
-        actual1.write(System.out);
-        */
-
-        GATKReportTable expected = new GATKReportTable("TableName", "Description");
-        expected.addPrimaryKey("key");
-        expected.addColumn("colA", 0);
-        expected.addColumn("colB", 0);
-        expected.set("row1", "colA", 1);
-        expected.set("row1", "colB", 2);
-        expected.set("row2", "colA", 3);
-        expected.set("row2", "colB", 4);
-        expected.set("row3", "colA", 5);
-        expected.set("row3", "colB", 6);
-        expected.write(System.out);
-
-        GATKReport report1, report2, report3;
-        report1 = new GATKReport();
-        report1.addTable("TableName", "Description");
-        report1.getTable("TableName").addPrimaryKey("key");
-        report1.getTable("TableName").addColumn("colA", 0);
-        report1.getTable("TableName").addColumn("colB", 0);
-        report1.getTable("TableName").set("row1", "colA", 1);
-        report1.getTable("TableName").set("row1", "colB", 2);
-
-        report2 = new GATKReport();
-        report2.addTable("TableName", "Description");
-        report2.getTable("TableName").addPrimaryKey("key");
-        report2.getTable("TableName").addColumn("colA", 0);
-        report2.getTable("TableName").addColumn("colB", 0);
-        report2.getTable("TableName").set("row2", "colA", 3);
-        report2.getTable("TableName").set("row2", "colB", 4);
-
-        report3 = new GATKReport();
-        report3.addTable("TableName", "Description");
-        report3.getTable("TableName").addPrimaryKey("key");
-        report3.getTable("TableName").addColumn("colA", 0);
-        report3.getTable("TableName").addColumn("colB", 0);
-        report3.getTable("TableName").set("row3", "colA", 5);
-        report3.getTable("TableName").set("row3", "colB", 6);
-
-        report1.combineWith(report2);
-        report1.combineWith(report3);
-
-        report1.print(System.out);
-        /*
-          File a = new File("/home/roger/tbls/a.tbl");
-          File b = new File("/home/roger/tbls/b.tbl");
-          File c = new File("/home/roger/tbls/c.tbl");
-          File out = new File("/home/roger/tbls/out.tbl");
-
-
-          List<File> FileList = new ArrayList<File>();
-          FileList.add(a);
-          FileList.add(b);
-          FileList.add(c);
-
-          GATKReportGatherer gatherer = new GATKReportGatherer();
-          gatherer.gather(FileList, out);
-          System.out.print(out);
-        */
-
-        //Assert.assertEquals(1,1);
-
-    }
-}
\ No newline at end of file
+}

From b57d4250bfae1ea8c6cdb5da8fca83e5affa01ea Mon Sep 17 00:00:00 2001
From: Matt Hanna <hanna@broadinstitute.org>
Date: Thu, 9 Feb 2012 11:24:52 -0500
Subject: [PATCH 37/67] Documentation request by Eric.  At each stage of the
 GATK where filtering occurs, added documentation suggesting the goal of the
 filtering along with examples of suggested inputs and outputs.

---
 .../gatk/datasources/providers/LocusView.java |   9 +-
 .../IntervalOverlapFilteringIterator.java     | 203 ++++++++++++++++++
 .../gatk/datasources/reads/SAMDataSource.java | 162 --------------
 .../sting/gatk/executive/WindowMaker.java     |  18 +-
 .../sting/gatk/traversals/TraverseLoci.java   |   4 +-
 5 files changed, 228 insertions(+), 168 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalOverlapFilteringIterator.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
index f9ed0cb74..a3ce6dd27 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
@@ -25,9 +25,14 @@ import java.util.NoSuchElementException;
  */
 
 /**
- * A queue of locus context entries.
+ * The two goals of the LocusView are as follows:
+ * 1) To provide a 'trigger track' iteration interface so that TraverseLoci can easily switch
+ *    between iterating over all bases in a region, only covered bases in a region covered by
+ *    reads, only bases in a region covered by RODs, or any other sort of trigger track
+ *    implementation one can think of.
+ * 2) To manage the copious number of iterators that have to be jointly pulled through the
+ *    genome to make a locus traversal function.
  */
-
 public abstract class LocusView extends LocusIterator implements View {
     /**
      * The locus bounding this view.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalOverlapFilteringIterator.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalOverlapFilteringIterator.java
new file mode 100644
index 000000000..4005f1c32
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalOverlapFilteringIterator.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * High efficiency filtering iterator designed to filter out reads only included
+ * in the query results due to the granularity of the BAM index.
+ *
+ * Built into the BAM index is a notion of 16kbase granularity -- an index query for
+ * two regions contained within a 16kbase chunk (say, chr1:5-10 and chr1:11-20) will
+ * return exactly the same regions within the BAM file.  This iterator is optimized
+ * to subtract out reads which do not at all overlap the interval list passed to the
+ * constructor.
+ *
+ * Example:
+ * interval list: chr20:6-10
+ * Reads that would pass through the filter: chr20:6-10, chr20:1-15, chr20:1-7, chr20:8-15.
+ * Reads that would be discarded by the filter: chr20:1-5, chr20:11-15.
+ */
+class IntervalOverlapFilteringIterator implements CloseableIterator<SAMRecord> {
+    /**
+     * The wrapped iterator.
+     */
+    private CloseableIterator<SAMRecord> iterator;
+
+    /**
+     * The next read, queued up and ready to go.
+     */
+    private SAMRecord nextRead;
+
+    /**
+     * Rather than using the straight genomic bounds, use filter out only mapped reads.
+     */
+    private boolean keepOnlyUnmappedReads;
+
+    /**
+     * Custom representation of interval bounds.
+     * Makes it simpler to track current position.
+     */
+    private int[] intervalContigIndices;
+    private int[] intervalStarts;
+    private int[] intervalEnds;
+
+    /**
+     * Position within the interval list.
+     */
+    private int currentBound = 0;
+
+    public IntervalOverlapFilteringIterator(CloseableIterator<SAMRecord> iterator, List<GenomeLoc> intervals) {
+        this.iterator = iterator;
+
+        // Look at the interval list to detect whether we should worry about unmapped reads.
+        // If we find a mix of mapped/unmapped intervals, throw an exception.
+        boolean foundMappedIntervals = false;
+        for(GenomeLoc location: intervals) {
+            if(! GenomeLoc.isUnmapped(location))
+                foundMappedIntervals = true;
+            keepOnlyUnmappedReads |= GenomeLoc.isUnmapped(location);
+        }
+
+
+        if(foundMappedIntervals) {
+            if(keepOnlyUnmappedReads)
+                throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals.  Please apply this filter to only mapped or only unmapped reads");
+            this.intervalContigIndices = new int[intervals.size()];
+            this.intervalStarts = new int[intervals.size()];
+            this.intervalEnds = new int[intervals.size()];
+            int i = 0;
+            for(GenomeLoc interval: intervals) {
+                intervalContigIndices[i] = interval.getContigIndex();
+                intervalStarts[i] = interval.getStart();
+                intervalEnds[i] = interval.getStop();
+                i++;
+            }
+        }
+
+        advance();
+    }
+
+    public boolean hasNext() {
+        return nextRead != null;
+    }
+
+    public SAMRecord next() {
+        if(nextRead == null)
+            throw new NoSuchElementException("No more reads left in this iterator.");
+        SAMRecord currentRead = nextRead;
+        advance();
+        return currentRead;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Cannot remove from an IntervalOverlapFilteringIterator");
+    }
+
+
+    public void close() {
+        iterator.close();
+    }
+
+    private void advance() {
+        nextRead = null;
+
+        if(!iterator.hasNext())
+            return;
+
+        SAMRecord candidateRead = iterator.next();
+        while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
+            if(!keepOnlyUnmappedReads) {
+                // Mapped read filter; check against GenomeLoc-derived bounds.
+                if(readEndsOnOrAfterStartingBound(candidateRead)) {
+                    // This read ends after the current interval begins.
+                    // Promising, but this read must be checked against the ending bound.
+                    if(readStartsOnOrBeforeEndingBound(candidateRead)) {
+                        // Yes, this read is within both bounds.  This must be our next read.
+                        nextRead = candidateRead;
+                        break;
+                    }
+                    else {
+                        // Oops, we're past the end bound.  Increment the current bound and try again.
+                        currentBound++;
+                        continue;
+                    }
+                }
+            }
+            else {
+                // Found an unmapped read.  We're done.
+                if(candidateRead.getReadUnmappedFlag()) {
+                    nextRead = candidateRead;
+                    break;
+                }
+            }
+
+            // No more reads available.  Stop the search.
+            if(!iterator.hasNext())
+                break;
+
+            // No reasonable read found; advance the iterator.
+            candidateRead = iterator.next();
+        }
+    }
+
+    /**
+     * Check whether the read lies after the start of the current bound.  If the read is unmapped but placed, its
+     * end will be distorted, so rely only on the alignment start.
+     * @param read The read to position-check.
+     * @return True if the read starts after the current bounds.  False otherwise.
+     */
+    private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
+        return
+                // Read ends on a later contig, or...
+                read.getReferenceIndex() > intervalContigIndices[currentBound] ||
+                        // Read ends of this contig...
+                        (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
+                                // either after this location, or...
+                                (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
+                                        // read is unmapped but positioned and alignment start is on or after this start point.
+                                        (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
+    }
+
+    /**
+     * Check whether the read lies before the end of the current bound.
+     * @param read The read to position-check.
+     * @return True if the read starts after the current bounds.  False otherwise.
+     */
+    private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
+        return
+                // Read starts on a prior contig, or...
+                read.getReferenceIndex() < intervalContigIndices[currentBound] ||
+                        // Read starts on this contig and the alignment start is registered before this end point.
+                        (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index a4681cffd..27b9e7f77 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -39,7 +39,6 @@ import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.*;
 import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
-import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.SimpleTimer;
@@ -976,167 +975,6 @@ public class SAMDataSource {
      */
     private class ReadGroupMapping extends HashMap<String,String> {}
 
-    /**
-     * Filters out reads that do not overlap the current GenomeLoc.
-     * Note the custom implementation: BAM index querying returns all reads that could
-     * possibly overlap the given region (and quite a few extras).  In order not to drag
-     * down performance, this implementation is highly customized to its task.
-     */
-    private class IntervalOverlapFilteringIterator implements CloseableIterator<SAMRecord> {
-        /**
-         * The wrapped iterator.
-         */
-        private CloseableIterator<SAMRecord> iterator;
-
-        /**
-         * The next read, queued up and ready to go.
-         */
-        private SAMRecord nextRead;
-
-        /**
-         * Rather than using the straight genomic bounds, use filter out only mapped reads.
-         */
-        private boolean keepOnlyUnmappedReads;
-
-        /**
-         * Custom representation of interval bounds.
-         * Makes it simpler to track current position.
-         */
-        private int[] intervalContigIndices;
-        private int[] intervalStarts;
-        private int[] intervalEnds;
-
-        /**
-         * Position within the interval list.
-         */
-        private int currentBound = 0;
-
-        public IntervalOverlapFilteringIterator(CloseableIterator<SAMRecord> iterator, List<GenomeLoc> intervals) {
-            this.iterator = iterator;
-
-            // Look at the interval list to detect whether we should worry about unmapped reads.
-            // If we find a mix of mapped/unmapped intervals, throw an exception.
-            boolean foundMappedIntervals = false;
-            for(GenomeLoc location: intervals) {
-                if(! GenomeLoc.isUnmapped(location))
-                    foundMappedIntervals = true;
-                keepOnlyUnmappedReads |= GenomeLoc.isUnmapped(location);
-            }
-
-
-            if(foundMappedIntervals) {
-                if(keepOnlyUnmappedReads)
-                    throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals.  Please apply this filter to only mapped or only unmapped reads");
-                this.intervalContigIndices = new int[intervals.size()];
-                this.intervalStarts = new int[intervals.size()];
-                this.intervalEnds = new int[intervals.size()];
-                int i = 0;
-                for(GenomeLoc interval: intervals) {
-                    intervalContigIndices[i] = interval.getContigIndex();
-                    intervalStarts[i] = interval.getStart();
-                    intervalEnds[i] = interval.getStop();
-                    i++;
-                }
-            }
-
-            advance();
-        }
-
-        public boolean hasNext() {
-            return nextRead != null;
-        }
-
-        public SAMRecord next() {
-            if(nextRead == null)
-                throw new NoSuchElementException("No more reads left in this iterator.");
-            SAMRecord currentRead = nextRead;
-            advance();
-            return currentRead;
-        }
-
-        public void remove() {
-            throw new UnsupportedOperationException("Cannot remove from an IntervalOverlapFilteringIterator");
-        }
-
-
-        public void close() {
-            iterator.close();
-        }
-
-        private void advance() {
-            nextRead = null;
-
-            if(!iterator.hasNext())
-                return;
-
-            SAMRecord candidateRead = iterator.next();
-            while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
-                if(!keepOnlyUnmappedReads) {
-                    // Mapped read filter; check against GenomeLoc-derived bounds.
-                    if(readEndsOnOrAfterStartingBound(candidateRead)) {
-                        // This read ends after the current interval begins.
-                        // Promising, but this read must be checked against the ending bound.
-                        if(readStartsOnOrBeforeEndingBound(candidateRead)) {
-                            // Yes, this read is within both bounds.  This must be our next read.
-                            nextRead = candidateRead;
-                            break;
-                        }
-                        else {
-                            // Oops, we're past the end bound.  Increment the current bound and try again.
-                            currentBound++;
-                            continue;
-                        }
-                    }
-                }
-                else {
-                    // Found an unmapped read.  We're done.
-                    if(candidateRead.getReadUnmappedFlag()) {
-                        nextRead = candidateRead;
-                        break;
-                    }
-                }
-
-                // No more reads available.  Stop the search.
-                if(!iterator.hasNext())
-                    break;
-
-                // No reasonable read found; advance the iterator.
-                candidateRead = iterator.next();
-            }
-        }
-
-        /**
-         * Check whether the read lies after the start of the current bound.  If the read is unmapped but placed, its
-         * end will be distorted, so rely only on the alignment start.
-         * @param read The read to position-check.
-         * @return True if the read starts after the current bounds.  False otherwise.
-         */
-        private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
-            return
-                    // Read ends on a later contig, or...
-                    read.getReferenceIndex() > intervalContigIndices[currentBound] ||
-                            // Read ends of this contig...
-                            (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
-                                    // either after this location, or...
-                                    (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
-                                            // read is unmapped but positioned and alignment start is on or after this start point.
-                                            (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
-        }
-
-        /**
-         * Check whether the read lies before the end of the current bound.
-         * @param read The read to position-check.
-         * @return True if the read starts after the current bounds.  False otherwise.
-         */
-        private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
-            return
-                    // Read starts on a prior contig, or...
-                    read.getReferenceIndex() < intervalContigIndices[currentBound] ||
-                            // Read starts on this contig and the alignment start is registered before this end point.
-                            (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
-        }
-    }
-
     /**
      * Locates the index file alongside the given BAM, if present.
      * TODO: This is currently a hachetjob that reaches into Picard and pulls out its index file locator.  Replace with something more permanent.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
index d1f5d80da..da11d36dd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@@ -17,9 +17,21 @@ import java.util.List;
 import java.util.NoSuchElementException;
 
 /**
- * Buffer shards of data which may or may not contain multiple loci into
- * iterators of all data which cover an interval.  Its existence is an homage
- * to Mark's stillborn WindowMaker, RIP 2009.
+ * Transforms an iterator of reads which overlap the given interval list into an iterator of covered single-base loci
+ * completely contained within the interval list.  To do this, it creates a LocusIteratorByState which will emit a single-bp
+ * locus for every base covered by the read iterator, then uses the WindowMakerIterator.advance() to filter down that stream of
+ * loci to only those covered by the given interval list.
+ *
+ * Example:
+ * Incoming stream of reads: A:chr20:1-5, B:chr20:2-6, C:chr20:2-7, D:chr20:3-8, E:chr20:5-10
+ * Incoming intervals: chr20:3-7
+ *
+ * Locus iterator by state will produce the following stream of data:
+ *  chr1:1 {A}, chr1:2 {A,B,C}, chr1:3 {A,B,C,D}, chr1:4 {A,B,C,D}, chr1:5 {A,B,C,D,E},
+ *  chr1:6 {B,C,D,E}, chr1:7 {C,D,E}, chr1:8 {D,E}, chr1:9 {E}, chr1:10 {E}
+ *
+ * WindowMakerIterator will then filter the incoming stream, emitting the following stream:
+ *  chr1:3 {A,B,C,D}, chr1:4 {A,B,C,D}, chr1:5 {A,B,C,D,E}, chr1:6 {B,C,D,E}, chr1:7 {C,D,E}
  *
  * @author mhanna
  * @version 0.1
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
index d99e7c353..1d14a7f35 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
@@ -102,7 +102,9 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
     }
 
     /**
-     * Gets the best view of loci for this walker given the available data.
+     * Gets the best view of loci for this walker given the available data.  The view will function as a 'trigger track'
+     * of sorts, providing a consistent interface so that TraverseLoci doesn't need to be reimplemented for any new datatype
+     * that comes along.
      * @param walker walker to interrogate.
      * @param dataProvider Data which which to drive the locus view.
      * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.

From 0f728a0604c7c531b0419361673e8791b403f48b Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 9 Feb 2012 14:02:34 -0500
Subject: [PATCH 38/67] The Exact model now subsets the VC to the first N
 alleles when the VC contains more than the maximum number of alleles (instead
 of throwing it out completely as it did previously).  [Perhaps the culling
 should be done by the UG engine?  But theoretically the Exact model can be
 called outside of the UG and we'd still want the context subsetted.]

---
 .../AlleleFrequencyCalculationModel.java      |  18 ++-
 .../genotyper/ExactAFCalculationModel.java    |  26 +++-
 .../genotyper/UnifiedGenotyperEngine.java     | 147 +++++++++---------
 3 files changed, 106 insertions(+), 85 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
index 681cc1fa6..9f2403bbf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
 import java.io.PrintStream;
 import java.util.List;
@@ -41,10 +41,11 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
 
     public enum Model {
         /** The default model with the best performance in all cases */
-        EXACT,
+        EXACT
     }
 
     protected int N;
+    protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
 
     protected Logger logger;
     protected PrintStream verboseWriter;
@@ -53,20 +54,21 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
 
     protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
 
-    protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
+    protected AlleleFrequencyCalculationModel(final UnifiedArgumentCollection UAC, final int N, final Logger logger, final PrintStream verboseWriter) {
         this.N = N;
+        this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = UAC.MAX_ALTERNATE_ALLELES;
         this.logger = logger;
         this.verboseWriter = verboseWriter;
     }
 
     /**
      * Must be overridden by concrete subclasses
-     * @param GLs                               genotype likelihoods
-     * @param Alleles                           Alleles corresponding to GLs
+     * @param vc                                variant context with alleles and genotype likelihoods
      * @param log10AlleleFrequencyPriors        priors
      * @param result                            (pre-allocated) object to store likelihoods results
+     * @return the alleles used for genotyping
      */
-    protected abstract void getLog10PNonRef(GenotypesContext GLs,  List<Allele> Alleles,
-                                            double[][] log10AlleleFrequencyPriors,
-                                            AlleleFrequencyCalculationResult result);
+    protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
+                                                    final double[][] log10AlleleFrequencyPriors,
+                                                    final AlleleFrequencyCalculationResult result);
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
index d604e8d62..f9518a35c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@@ -43,14 +43,28 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
         super(UAC, N, logger, verboseWriter);
     }
 
-    public void getLog10PNonRef(final GenotypesContext GLs,
-                                final List<Allele> alleles,
-                                final double[][] log10AlleleFrequencyPriors,
-                                final AlleleFrequencyCalculationResult result) {
-        final int numAlleles = alleles.size();
+    public List<Allele> getLog10PNonRef(final VariantContext vc,
+                                        final double[][] log10AlleleFrequencyPriors,
+                                        final AlleleFrequencyCalculationResult result) {
+
+        final GenotypesContext GLs = vc.getGenotypes();
+        List<Allele> alleles = vc.getAlleles();
+
+        // don't try to genotype too many alternate alleles
+        if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
+            logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
+
+            alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
+            alleles.add(vc.getReference());
+            for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
+                alleles.add(vc.getAlternateAllele(i));
+            UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
+        }
 
         //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
-        linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false);
+        linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result, false);
+
+        return alleles;
     }
 
     private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 2eba6d884..aa5776007 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -295,12 +295,6 @@ public class UnifiedGenotyperEngine {
         }
         AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
 
-        // don't try to genotype too many alternate alleles
-        if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
-            logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
-            return null;
-        }
-
         // estimate our confidence in a reference call and return
         if ( vc.getNSamples() == 0 ) {
             if ( limitedContext )
@@ -313,25 +307,32 @@ public class UnifiedGenotyperEngine {
         // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
         clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
         clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
-        afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
+        List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
 
         // is the most likely frequency conformation AC=0 for all alternate alleles?
         boolean bestGuessIsRef = true;
 
         // determine which alternate alleles have AF>0
-        boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
+        final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
+        myAlleles.add(vc.getReference());
         for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
-            int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]);
+            final Allele alternateAllele = vc.getAlternateAllele(i);
+            final int indexOfAllele = allelesUsedInGenotyping.indexOf(alternateAllele);
+            // the genotyping model may have stripped it out
+            if ( indexOfAllele == -1 )
+                continue;
+                
+            int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1]);
 
             // if the most likely AC is not 0, then this is a good alternate allele to use;
             // make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array
-            if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
-                altAllelesToUse[i] = true;
+            if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
+                myAlleles.add(alternateAllele);
                 bestGuessIsRef = false;
             }
             // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
             else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-                altAllelesToUse[i] = true;
+                myAlleles.add(alternateAllele);
             }
         }
 
@@ -367,20 +368,6 @@ public class UnifiedGenotyperEngine {
             return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
         }
 
-        // strip out any alleles that aren't going to be used in the VariantContext
-        final List<Allele> myAlleles;
-        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
-            myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
-            myAlleles.add(vc.getReference());
-            for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
-                if ( altAllelesToUse[i] )
-                    myAlleles.add(vc.getAlternateAllele(i));
-            }
-        } else {
-            // use all of the alleles if we are given them by the user
-            myAlleles = vc.getAlleles();
-        }
-
         // start constructing the resulting VC
         final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
         final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
@@ -394,7 +381,7 @@ public class UnifiedGenotyperEngine {
         }
 
         // create the genotypes
-        final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
+        final GenotypesContext genotypes = subsetAlleles(vc, myAlleles, true);
 
         // print out stats if we have a writer
         if ( verboseWriter != null && !limitedContext )
@@ -414,7 +401,7 @@ public class UnifiedGenotyperEngine {
             VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
             clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
             clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
-            afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
+            afcm.get().getLog10PNonRef(vcOverall, getAlleleFrequencyPriors(model), AFresult);
             //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
             double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
             //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
@@ -423,7 +410,7 @@ public class UnifiedGenotyperEngine {
             VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
             clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
             clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
-            afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
+            afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
             //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
             double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
             double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
@@ -433,7 +420,7 @@ public class UnifiedGenotyperEngine {
             VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
             clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
             clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
-            afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
+            afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
             //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
             double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
             double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
@@ -772,30 +759,36 @@ public class UnifiedGenotyperEngine {
 
     /**
      * @param vc            variant context with genotype likelihoods
-     * @param allelesToUse  bit vector describing which alternate alleles from the vc are okay to use
      * @return genotypes
      */
-    public static GenotypesContext assignGenotypes(final VariantContext vc,
-                                                   final boolean[] allelesToUse) {
+    public static GenotypesContext assignGenotypes(final VariantContext vc) {
+        return subsetAlleles(vc, vc.getAlleles(), true);
+    }
 
-        // the no-called genotypes
-        final GenotypesContext GLs = vc.getGenotypes();
+    /**
+     * @param vc                 variant context with genotype likelihoods
+     * @param allelesToUse       which alleles from the vc are okay to use
+     * @param assignGenotypes    true if we should change the genotypes based on the (subsetted) PLs
+     * @return genotypes
+     */
+    public static GenotypesContext subsetAlleles(final VariantContext vc,
+                                                 final List<Allele> allelesToUse,
+                                                 final boolean assignGenotypes) {
+
+        // the genotypes with PLs
+        final GenotypesContext oldGTs = vc.getGenotypes();
 
         // samples
-        final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
+        final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
 
-        // the new called genotypes to create
-        final GenotypesContext calls = GenotypesContext.create();
+        // the new genotypes to create
+        final GenotypesContext newGTs = GenotypesContext.create();
 
         // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
-        final int numOriginalAltAlleles = allelesToUse.length;
-        final List<Allele> newAlleles = new ArrayList<Allele>(numOriginalAltAlleles+1);
-        newAlleles.add(vc.getReference());
-        for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
-            if ( allelesToUse[i] )
-                newAlleles.add(vc.getAlternateAllele(i));
-        }
-        final int numNewAltAlleles = newAlleles.size() - 1;
+        final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
+        final int numNewAltAlleles = allelesToUse.size() - 1;
+
+        // which PLs should be carried forward?
         ArrayList<Integer> likelihoodIndexesToUse = null;
 
         // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
@@ -804,20 +797,27 @@ public class UnifiedGenotyperEngine {
             likelihoodIndexesToUse = new ArrayList<Integer>(30);
             final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
 
+            final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
+            for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
+                if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
+                    altAlleleIndexToUse[i] = true;
+            }
+
             for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
-                int[] alleles = PLcache[PLindex];
+                final int[] alleles = PLcache[PLindex];
                 // consider this entry only if both of the alleles are good
-                if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
+                if ( (alleles[0] == 0 || altAlleleIndexToUse[alleles[0] - 1]) && (alleles[1] == 0 || altAlleleIndexToUse[alleles[1] - 1]) )
                     likelihoodIndexesToUse.add(PLindex);
             }
         }
 
         // create the new genotypes
-        for ( int k = GLs.size() - 1; k >= 0; k-- ) {
-            final String sample = sampleIndices.get(k);
-            final Genotype g = GLs.get(sample);
-            if ( !g.hasLikelihoods() )
+        for ( int k = 0; k < oldGTs.size(); k++ ) {
+            final Genotype g = oldGTs.get(sampleIndices.get(k));
+            if ( !g.hasLikelihoods() ) {
+                newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
                 continue;
+            }
 
             // create the new likelihoods array from the alleles we are allowed to use
             final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
@@ -834,29 +834,34 @@ public class UnifiedGenotyperEngine {
                 newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
             }
 
-            // if there is no mass on the (new) likelihoods and we actually have alternate alleles, then just no-call the sample
-            if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
-                calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
+            // if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample
+            if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
+                newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
                 continue;
             }
 
-            // find the genotype with maximum likelihoods
-            int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
-            int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
-
-            ArrayList<Allele> myAlleles = new ArrayList<Allele>();
-            myAlleles.add(newAlleles.get(alleles[0]));
-            myAlleles.add(newAlleles.get(alleles[1]));
-
-            final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
-            Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
-            if ( numNewAltAlleles == 0 )
-                attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
-            else
-                attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
-            calls.add(new Genotype(sample, myAlleles, qual, null, attrs, false));
+            final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles);
+            newGTs.add(newGT);
         }
+        
+        return newGTs;
+    }
+     
+    protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles) {
+        // find the genotype with maximum likelihoods
+        int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
+        int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
 
-        return calls;
+        ArrayList<Allele> myAlleles = new ArrayList<Allele>();
+        myAlleles.add(allelesToUse.get(alleles[0]));
+        myAlleles.add(allelesToUse.get(alleles[1]));
+
+        final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
+        Map<String, Object> attrs = new HashMap<String, Object>(originalGT.getAttributes());
+        if ( numNewAltAlleles == 0 )
+            attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
+        else
+            attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
+        return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
     }
 }

From 7a937dd1ebc5186f864bde7736fe00a8f2325f88 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 9 Feb 2012 16:14:22 -0500
Subject: [PATCH 39/67] Several bug fixes to new genotyping strategy.  Update
 integration tests for multi-allelic indels accordingly.

---
 .../genotyper/ExactAFCalculationModel.java    |  4 +--
 .../genotyper/UnifiedGenotyperEngine.java     | 30 ++++++++++++-------
 .../UnifiedGenotyperIntegrationTest.java      |  4 +--
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
index f9518a35c..d833e9f8e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@@ -47,7 +47,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                         final double[][] log10AlleleFrequencyPriors,
                                         final AlleleFrequencyCalculationResult result) {
 
-        final GenotypesContext GLs = vc.getGenotypes();
+        GenotypesContext GLs = vc.getGenotypes();
         List<Allele> alleles = vc.getAlleles();
 
         // don't try to genotype too many alternate alleles
@@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
             alleles.add(vc.getReference());
             for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
                 alleles.add(vc.getAlternateAllele(i));
-            UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
+            GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
         }
 
         //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index aa5776007..c84c944b8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -795,6 +795,10 @@ public class UnifiedGenotyperEngine {
         // then we can keep the PLs as is; otherwise, we determine which ones to keep
         if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
             likelihoodIndexesToUse = new ArrayList<Integer>(30);
+
+            // make sure that we've cached enough data
+            if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 )
+                calculatePLcache(numOriginalAltAlleles);
             final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
 
             final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
@@ -834,20 +838,29 @@ public class UnifiedGenotyperEngine {
                 newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
             }
 
-            // if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample
-            if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
+            // if there is no mass on the (new) likelihoods, then just no-call the sample
+            if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
                 newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
-                continue;
             }
+            else {
+                Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
+                if ( numNewAltAlleles == 0 )
+                    attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
+                else
+                    attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
 
-            final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles);
-            newGTs.add(newGT);
+                // if we weren't asked to assign a genotype, then just no-call the sample
+                if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
+                    newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
+                else
+                    newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles, attrs));
+            }
         }
         
         return newGTs;
     }
      
-    protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles) {
+    protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles, Map<String, Object> attrs) {
         // find the genotype with maximum likelihoods
         int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
         int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
@@ -857,11 +870,6 @@ public class UnifiedGenotyperEngine {
         myAlleles.add(allelesToUse.get(alleles[1]));
 
         final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
-        Map<String, Object> attrs = new HashMap<String, Object>(originalGT.getAttributes());
-        if ( numNewAltAlleles == 0 )
-            attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
-        else
-            attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
         return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index fd6738123..125242a2f 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
-                Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
+                Arrays.asList("e4d2904b406f37d99fbe8f52ae75254f"));
         executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
     }
 
@@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
                         "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
-                Arrays.asList("1d1956fd7b0f0d30935674b2f5019860"));
+                Arrays.asList("21f7b6c8b7eaccad1754a832bac79a65"));
         executeTest("test MultiSample Phase1 indels with complicated records", spec4);
     }
 

From 5af373a3a13a3558d758fb280e8cdced5c4bc949 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 8 Feb 2012 14:39:55 -0500
Subject: [PATCH 40/67] BQSR with indels integrated!

   * added support to base before deletion in the pileup
   * refactored covariates to operate on mismatches, insertions and deletions at the same time
   * all code is in private so original BQSR is still working as usual in public
   * outputs a molten CSV with mismatches, insertions and deletions, time to play!
   * barely tested, passes my very simple tests... haven't tested edge cases.
---
 .../sting/gatk/iterators/LocusIteratorByState.java   |  4 ++--
 .../SNPGenotypeLikelihoodsCalculationModel.java      |  7 +++++--
 .../org/broadinstitute/sting/utils/NGSPlatform.java  | 12 +++++++++++-
 .../sting/utils/pileup/AbstractReadBackedPileup.java |  6 +++---
 .../utils/pileup/ExtendedEventPileupElement.java     |  2 +-
 .../sting/utils/pileup/PileupElement.java            |  9 ++++++++-
 .../pileup/ReadBackedExtendedEventPileupImpl.java    |  2 +-
 .../sting/utils/pileup/ReadBackedPileupImpl.java     |  4 ++--
 .../sting/utils/sam/ArtificialSAMUtils.java          |  4 ++--
 .../sting/utils/sam/GATKSAMRecordUnitTest.java       |  4 ++--
 10 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
index 316a20a70..6edae3816 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@@ -470,7 +470,7 @@ public class LocusIteratorByState extends LocusIterator {
 
                         if (op == CigarOperator.D) {
                             if (readInfo.includeReadsWithDeletionAtLoci()) {          // only add deletions to the pileup if we are authorized to do so
-                                pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart())));
+                                pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart())));
                                 size++;
                                 nDeletions++;
                                 if (read.getMappingQuality() == 0)
@@ -479,7 +479,7 @@ public class LocusIteratorByState extends LocusIterator {
                         }
                         else {
                             if (!filterBaseInRead(read, location.getStart())) {
-                                pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart())));
+                                pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart())));
                                 size++;
                                 if (read.getMappingQuality() == 0)
                                     nMQ0Reads++;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index ea53c815d..6171b01eb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -39,7 +39,10 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.variantcontext.*;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
 
@@ -212,7 +215,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
 
     public class BAQedPileupElement extends PileupElement {
         public BAQedPileupElement( final PileupElement PE ) {
-            super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeInsertion(), PE.isNextToSoftClip());
+            super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletion(), PE.isBeforeInsertion(), PE.isNextToSoftClip());
         }
 
         @Override
diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java
index 4f01f2b7a..597dc4803 100644
--- a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java
+++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java
@@ -87,7 +87,7 @@ public enum NGSPlatform {
     /**
      * Returns the NGSPlatform corresponding to the PL tag in the read group
      * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
-     * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
+     * @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match
      */
     public static final NGSPlatform fromReadGroupPL(final String plFromRG) {
         if ( plFromRG == null ) return UNKNOWN;
@@ -105,4 +105,14 @@ public enum NGSPlatform {
 
         return UNKNOWN;
     }
+
+    /**
+     * checks whether or not the requested platform is listed in the set (and is not unknown)
+     *
+     * @param platform the read group string that describes the platform used
+     * @return true if the platform is known (i.e. it's in the list and is not UNKNOWN)
+     */
+    public static final boolean isKnown (final String platform) {
+        return fromReadGroupPL(platform) != UNKNOWN;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index 82e403842..70ad70f43 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
         for (int i = 0; i < reads.size(); i++) {
             GATKSAMRecord read = reads.get(i);
             int offset = offsets.get(i);
-            pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
+            pileup.add(createNewPileupElement(read, offset, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
         }
 
         return pileup;
@@ -196,7 +196,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
 
         UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
         for (GATKSAMRecord read : reads) {
-            pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
+            pileup.add(createNewPileupElement(read, offset, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
         }
 
         return pileup;
@@ -204,7 +204,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
 
     protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
 
-    protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip);
+    protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip);
 
     // --------------------------------------------------------
     //
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java
index 921da2a1f..506442d03 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java
@@ -48,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement {
 
 
     public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
-        super(read, offset, type == Type.DELETION, false, false); // extended events are slated for removal
+        super(read, offset, type == Type.DELETION, false, false, false); // extended events are slated for removal
         this.read = read;
         this.offset = offset;
         this.eventLength = eventLength;
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
index a4830223e..9df22700e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java
@@ -24,6 +24,7 @@ public class PileupElement implements Comparable<PileupElement> {
     protected final GATKSAMRecord read;
     protected final int offset;
     protected final boolean isDeletion;
+    protected final boolean isBeforeDeletion;
     protected final boolean isBeforeInsertion;
     protected final boolean isNextToSoftClip;
 
@@ -33,6 +34,7 @@ public class PileupElement implements Comparable<PileupElement> {
      * @param read              the read we are adding to the pileup
      * @param offset            the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
      * @param isDeletion        whether or not this base is a deletion
+     * @param isBeforeDeletion  whether or not this base is before a deletion
      * @param isBeforeInsertion whether or not this base is before an insertion
      * @param isNextToSoftClip  whether or not this base is next to a soft clipped base
      */
@@ -40,13 +42,14 @@ public class PileupElement implements Comparable<PileupElement> {
             "read != null",
             "offset >= -1",
             "offset <= read.getReadLength()"})
-    public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip) {
+    public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip) {
         if (offset < 0 && isDeletion)
             throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
 
         this.read = read;
         this.offset = offset;
         this.isDeletion = isDeletion;
+        this.isBeforeDeletion = isBeforeDeletion;
         this.isBeforeInsertion = isBeforeInsertion;
         this.isNextToSoftClip = isNextToSoftClip;
     }
@@ -55,6 +58,10 @@ public class PileupElement implements Comparable<PileupElement> {
         return isDeletion;
     }
 
+    public boolean isBeforeDeletion() {
+        return isBeforeDeletion;
+    }
+
     public boolean isBeforeInsertion() {
         return isBeforeInsertion;
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java
index df334f557..357195daa 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java
@@ -96,7 +96,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
     }
 
     @Override
-    protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) {
+    protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) {
         throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
index 20b100001..7a6ebef21 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
@@ -71,7 +71,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
     }
 
     @Override
-    protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) {
-        return new PileupElement(read, offset, isDeletion, isBeforeInsertion, isNextToSoftClip);
+    protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) {
+        return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isBeforeInsertion, isNextToSoftClip);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
index 1175a038f..b17e325fc 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@@ -361,10 +361,10 @@ public class ArtificialSAMUtils {
             final GATKSAMRecord left = pair.get(0);
             final GATKSAMRecord right = pair.get(1);
 
-            pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false));
+            pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false, false));
 
             if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
-                pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false));
+                pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false, false));
             }
         }
 
diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java
index 729503f84..520fb7040 100755
--- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java
@@ -42,8 +42,8 @@ public class GATKSAMRecordUnitTest extends BaseTest {
 
     @Test
     public void testReducedReadPileupElement() {
-        PileupElement readp = new PileupElement(read, 0, false, false, false);
-        PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
+        PileupElement readp = new PileupElement(read, 0, false, false, false, false);
+        PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false, false);
 
         Assert.assertFalse(readp.getRead().isReducedRead());
 

From f53cd3de1b9b92fe9e4048a3201c54512fb5c6ce Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Fri, 10 Feb 2012 11:07:32 -0500
Subject: [PATCH 41/67] Based on Ryan's suggestion, there's a new contract for
 genotyping multiple alleles.  Now the requester submits alleles in any
 arbitrary order - rankings aren't needed.  If the Exact model decides that it
 needs to subset the alleles because too many were requested, it does so based
 on PL mass (in other words, I moved this code from the
 SNPGenotypeLikelihoodsCalculationModel to the Exact model).  Now subsetting
 alleles is consistent.

---
 .../genotyper/ExactAFCalculationModel.java    | 55 ++++++++++++++++++-
 ...NPGenotypeLikelihoodsCalculationModel.java | 33 ++++-------
 .../genotyper/UnifiedGenotyperEngine.java     |  4 +-
 .../UnifiedGenotyperIntegrationTest.java      |  8 +--
 4 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
index d833e9f8e..ed737064d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@@ -56,8 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
 
             alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
             alleles.add(vc.getReference());
-            for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
-                alleles.add(vc.getAlternateAllele(i));
+            alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
             GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
         }
 
@@ -67,6 +66,58 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
         return alleles;
     }
 
+    private static final class LikelihoodSum implements Comparable<LikelihoodSum> {
+        public double sum = 0.0;
+        public Allele allele;
+
+        public LikelihoodSum(Allele allele) { this.allele = allele; }
+
+        public int compareTo(LikelihoodSum other) {
+            final double diff = sum - other.sum;
+            return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
+        }
+    }
+
+    private static final int PL_INDEX_OF_HOM_REF = 0;
+    private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
+        final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
+        final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
+        for ( int i = 0; i < numOriginalAltAlleles; i++ )
+            likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
+
+        // make sure that we've cached enough data
+        if ( numOriginalAltAlleles > UnifiedGenotyperEngine.PLIndexToAlleleIndex.length - 1 )
+            UnifiedGenotyperEngine.calculatePLcache(numOriginalAltAlleles);
+
+        // based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
+        final ArrayList<double[]> GLs = getGLs(vc.getGenotypes());
+        for ( final double[] likelihoods : GLs ) {
+            final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
+            if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
+                int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numOriginalAltAlleles][PLindexOfBestGL];
+                if ( alleles[0] != 0 )
+                    likelihoodSums[alleles[0]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
+                // don't double-count it
+                if ( alleles[1] != 0 && alleles[1] != alleles[0] )
+                    likelihoodSums[alleles[1]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
+            }
+        }
+
+        // sort them by probability mass and choose the best ones
+        Collections.sort(Arrays.asList(likelihoodSums));
+        final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
+        for ( int i = 0; i < numAllelesToChoose; i++ )
+            bestAlleles.add(likelihoodSums[i].allele);
+
+        final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
+        for ( Allele allele : vc.getAlternateAlleles() ) {
+            if ( bestAlleles.contains(allele) )
+                orderedBestAlleles.add(allele);
+        }
+        
+        return orderedBestAlleles;
+    }
+    
     private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
         ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index 6f1f86c6d..c078be2f2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -45,20 +45,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
 
     private final boolean useAlleleFromVCF;
 
-    final LikelihoodSum[] likelihoodSums = new LikelihoodSum[4];
-
-    private final class LikelihoodSum implements Comparable<LikelihoodSum> {
-        public double sum = 0.0;
-        public Allele base;
-
-        public LikelihoodSum(Allele base) { this.base = base; }
-
-        public int compareTo(LikelihoodSum other) {
-            final double diff = sum - other.sum;
-            return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
-        }
-    }
-
+    private final double[] likelihoodSums = new double[4];
+    
     protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
         super(UAC, logger);
         useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
@@ -176,27 +164,26 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
         final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref);
         final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal();
         for ( int i = 0; i < 4; i++ )
-            likelihoodSums[i] = new LikelihoodSum(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
-
-        // based on the GLs, find the alternate alleles with the most probability
+            likelihoodSums[i] = 0.0;
+        
+        // based on the GLs, find the alternate alleles with enough probability
         for ( SampleGenotypeData sampleData : sampleDataList ) {
             final double[] likelihoods = sampleData.GL.getLikelihoods();
             final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
             if ( PLindexOfBestGL != PLindexOfRef ) {
                 int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL];
                 if ( alleles[0] != baseIndexOfRef )
-                    likelihoodSums[alleles[0]].sum += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
+                    likelihoodSums[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
                 // don't double-count it
                 if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] )
-                    likelihoodSums[alleles[1]].sum += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
+                    likelihoodSums[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
             }
         }
 
-        Collections.sort(Arrays.asList(likelihoodSums));
         final List<Allele> allelesToUse = new ArrayList<Allele>(3);
-        for ( LikelihoodSum sum : likelihoodSums ) {
-            if ( sum.sum > 0.0 )
-                allelesToUse.add(sum.base);
+        for ( int i = 0; i < 4; i++ ) {
+            if ( likelihoodSums[i] > 0.0 )
+                allelesToUse.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
         }
 
         return allelesToUse;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index c84c944b8..0156890ac 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -767,7 +767,7 @@ public class UnifiedGenotyperEngine {
 
     /**
      * @param vc                 variant context with genotype likelihoods
-     * @param allelesToUse       which alleles from the vc are okay to use
+     * @param allelesToUse       which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
      * @param assignGenotypes    true if we should change the genotypes based on the (subsetted) PLs
      * @return genotypes
      */
@@ -860,7 +860,7 @@ public class UnifiedGenotyperEngine {
         return newGTs;
     }
      
-    protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles, Map<String, Object> attrs) {
+    protected static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final int numNewAltAlleles, final Map<String, Object> attrs) {
         // find the genotype with maximum likelihoods
         int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
         int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 125242a2f..fc4f0f46b 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultiSamplePilot1() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("9ab4e98ce437a1c5e1eee338de49ee7e"));
+                Arrays.asList("202b337ebbea3def1be8495eb363dfa8"));
         executeTest("test MultiSample Pilot1", spec);
     }
 
@@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultipleSNPAlleles() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
-                Arrays.asList("aabc4b3a312aba18b78e14750d8c8e62"));
+                Arrays.asList("b53cb55a5f868663068812b13578af57"));
         executeTest("test Multiple SNP alleles", spec);
     }
 
@@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
-                Arrays.asList("e4d2904b406f37d99fbe8f52ae75254f"));
+                Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c"));
         executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
     }
 
@@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
                         "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
-                Arrays.asList("21f7b6c8b7eaccad1754a832bac79a65"));
+                Arrays.asList("5282fdb1711a532d726c13507bf80a21"));
         executeTest("test MultiSample Phase1 indels with complicated records", spec4);
     }
 

From 1fb19a0f98001d4ae71f2eb9c775051079bcb2ab Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Fri, 10 Feb 2012 11:43:48 -0500
Subject: [PATCH 44/67] Moving the covariates and shared functionality to
 public

so Ryan can work on the recalibration on the fly without breaking the build. Supposedly all the secret sauce is in the BQSR walker, which sits in private.
---
 .../gatk/walkers/bqsr/ContextCovariate.java   | 101 +++
 .../sting/gatk/walkers/bqsr/Covariate.java    |  62 ++
 .../gatk/walkers/bqsr/CovariateKeySet.java    |  63 ++
 .../gatk/walkers/bqsr/CovariateValues.java    |  37 +
 .../gatk/walkers/bqsr/CycleCovariate.java     | 199 +++++
 .../walkers/bqsr/QualityScoreCovariate.java   |  77 ++
 .../gatk/walkers/bqsr/ReadGroupCovariate.java |  57 ++
 .../gatk/walkers/bqsr/RecalDataManager.java   | 698 ++++++++++++++++++
 .../sting/gatk/walkers/bqsr/RecalDatum.java   | 112 +++
 .../walkers/bqsr/RecalDatumOptimized.java     | 115 +++
 .../bqsr/RecalibrationArgumentCollection.java | 102 +++
 11 files changed, 1623 insertions(+)
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
new file mode 100644
index 000000000..a46543f67
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 9/26/11
+ */
+
+public class ContextCovariate implements StandardCovariate {
+
+    private int mismatchesContextSize;
+    private int insertionsContextSize;
+    private int  deletionsContextSize;
+
+    private String mismatchesNoContext = "";
+    private String insertionsNoContext = "";
+    private String  deletionsNoContext = "";
+    
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
+        insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
+        deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
+
+        if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
+            throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
+
+        // initialize no context strings given the size of the context for each covariate type
+        mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
+        insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
+        deletionsNoContext  = makeAllNStringWithLength( deletionsContextSize);        
+    }
+
+    @Override
+    public CovariateValues getValues(final GATKSAMRecord read) {
+        int l = read.getReadLength();
+        String[] mismatches = new String [l];
+        String[] insertions = new String [l];
+        String[]  deletions = new String [l];
+        
+        byte[] bases = read.getReadBases();
+        for (int i = 0; i < read.getReadLength(); i++) {
+            mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
+            insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
+            deletions[i]  = contextWith(bases, i,  deletionsContextSize,  deletionsNoContext);
+        }
+        return new CovariateValues(mismatches, insertions, deletions);
+    }
+
+    /**
+     * calculates the context of a base indenpendent of the covariate mode
+     *
+     * @param bases           the bases in the read to build the context from
+     * @param offset          the position in the read to calculate the context for
+     * @param contextSize     context size to use building the context
+     * @param noContextString string to return if the position is not far enough in the read to have a full context before.
+     * @return
+     */
+    private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) {
+        return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
+    } 
+    
+    private String makeAllNStringWithLength(int length) {
+        String s = "";
+        for (int i=0; i<length; i++)
+            s += "N";
+        return s;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
new file mode 100755
index 000000000..d1726dd13
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
@@ -0,0 +1,62 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Oct 30, 2009
+ *
+ * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read.
+ * In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code.
+ * This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up.
+ */
+
+public interface Covariate {
+    /**
+     * Initialize any member variables using the command-line arguments passed to the walker
+     *
+     * @param RAC the recalibration argument collection
+     */
+    public void initialize(RecalibrationArgumentCollection RAC);
+
+    /**
+     * Calculates covariate values for all positions in the read.
+     *
+     * @param read the read to calculate the covariates on.
+     * @return all the covariate values for every base in the read.
+     */
+    public CovariateValues getValues(GATKSAMRecord read);
+
+}
+
+interface RequiredCovariate extends Covariate {}
+
+interface StandardCovariate extends Covariate {}
+
+interface ExperimentalCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
new file mode 100644
index 000000000..04a0684b6
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
@@ -0,0 +1,63 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+/**
+ * The object temporarily held by a read that describes all of it's covariates. 
+ * 
+ * In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
+ *
+ * @author Mauricio Carneiro
+ * @since 2/8/12
+ */
+public class CovariateKeySet {
+    private Object[][] mismatchesKeySet;
+    private Object[][] insertionsKeySet;
+    private Object[][]  deletionsKeySet;
+
+    private int nextCovariateIndex;
+    
+    private final String mismatchesCovariateName = "M";
+    private final String insertionsCovariateName = "I";
+    private final String  deletionsCovariateName = "D";
+
+    public CovariateKeySet(int readLength, int numberOfCovariates) {
+        numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
+        this.mismatchesKeySet = new Object[readLength][numberOfCovariates]; 
+        this.insertionsKeySet = new Object[readLength][numberOfCovariates];
+        this.deletionsKeySet  = new Object[readLength][numberOfCovariates];
+        initializeCovariateKeySet(this.mismatchesKeySet, this.mismatchesCovariateName);
+        initializeCovariateKeySet(this.insertionsKeySet, this.insertionsCovariateName);
+        initializeCovariateKeySet(this.deletionsKeySet,  this.deletionsCovariateName);
+        this.nextCovariateIndex = 0;
+    }
+    
+    public void addCovariate(CovariateValues covariate) {
+        transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
+        transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
+        transposeCovariateValues(deletionsKeySet,  covariate.getDeletions());
+        nextCovariateIndex++;
+    }
+    
+    public Object[] getMismatchesKeySet(int readPosition) {
+        return mismatchesKeySet[readPosition];
+    }
+
+    public Object[] getInsertionsKeySet(int readPosition) {
+        return insertionsKeySet[readPosition];
+    }
+
+    public Object[] getDeletionsKeySet(int readPosition) {
+        return deletionsKeySet[readPosition];
+    }
+
+    private void transposeCovariateValues (Object [][] keySet, Object [] covariateValues) {
+        for (int i=0; i<covariateValues.length; i++) 
+            keySet[i][nextCovariateIndex] = covariateValues[i];        
+    }
+    
+    private void initializeCovariateKeySet (Object[][] keySet, String covariateName) {
+        int readLength = keySet.length;
+        int lastCovariateIndex = keySet[0].length - 1;
+        for (int i = 0; i < readLength; i++) 
+            keySet[i][lastCovariateIndex] = covariateName;
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
new file mode 100644
index 000000000..dd3a44fb5
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java
@@ -0,0 +1,37 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+/**
+ * An object to hold the different covariate values for all bases in the read.
+ *
+ * Currently we have three different covariates for each read:
+ *   - Mismatch
+ *   - Insertion
+ *   - Deletion
+ *
+ * @author Mauricio Carneiro
+ * @since 2/8/12
+ */
+public class CovariateValues {
+    private Comparable[] mismatches;
+    private Comparable[] insertions;
+    private Comparable[] deletions;
+
+    public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) {
+        this.mismatches = mismatch;
+        this.insertions = insertion;
+        this.deletions = deletion;
+    }
+
+    public Comparable[] getMismatches() {
+        return mismatches;
+    }
+
+    public Comparable[] getInsertions() {
+        return insertions;
+    }
+
+    public Comparable[] getDeletions() {
+        return deletions;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
new file mode 100755
index 000000000..f996de50e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
@@ -0,0 +1,199 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.NGSPlatform;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.EnumSet;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Oct 30, 2009
+ *
+ * The Cycle covariate.
+ * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read)
+ * For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle
+ * For example, for the read: AAACCCCGAAATTTTTACTG
+ * the cycle would be 11111111222333333344
+ * For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round
+ */
+
+public class CycleCovariate implements StandardCovariate {
+    private final static EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
+    private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
+
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
+            throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");
+    }
+
+    // Used to pick out the covariate's value from attributes of the read
+    @Override
+    public CovariateValues getValues(final GATKSAMRecord read) {
+        Integer [] cycles = new Integer[read.getReadLength()];
+        final NGSPlatform ngsPlatform = read.getNGSPlatform();
+
+        // Discrete cycle platforms
+        if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
+            final int init;
+            final int increment;
+            if (!read.getReadNegativeStrandFlag()) {
+                // Differentiate between first and second of pair.
+                // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
+                // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
+                // Therefore the cycle covariate must differentiate between first and second of pair reads.
+                // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
+                //   the current sequential model would consider the effects independently instead of jointly.
+                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
+                    //second of pair, positive strand
+                    init = -1;
+                    increment = -1;
+                }
+                else {
+                    //first of pair, positive strand
+                    init = 1;
+                    increment = 1;
+                }
+
+            }
+            else {
+                if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
+                    //second of pair, negative strand
+                    init = -read.getReadLength();
+                    increment = 1;
+                }
+                else {
+                    //first of pair, negative strand
+                    init = read.getReadLength();
+                    increment = -1;
+                }
+            }
+
+            int cycle = init;
+            for (int i = 0; i < read.getReadLength(); i++) {
+                cycles[i] = cycle;
+                cycle += increment;
+            }
+        }
+
+        // Flow cycle platforms
+        else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) {
+
+            final int readLength = read.getReadLength();
+            final byte[] bases = read.getReadBases();
+
+            // Differentiate between first and second of pair.
+            // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
+            // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
+            // Therefore the cycle covariate must differentiate between first and second of pair reads.
+            // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
+            //   the current sequential model would consider the effects independently instead of jointly.
+            final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
+
+            int cycle = multiplyByNegative1 ? -1 : 1;
+
+            // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
+            // For example, AAAAAAA was probably read in two flow cycles but here we count it as one
+            if (!read.getReadNegativeStrandFlag()) { // Forward direction
+                int iii = 0;
+                while (iii < readLength) {
+                    while (iii < readLength && bases[iii] == (byte) 'T') {
+                        cycles[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'A') {
+                        cycles[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'C') {
+                        cycles[iii] = cycle;
+                        iii++;
+                    }
+                    while (iii < readLength && bases[iii] == (byte) 'G') {
+                        cycles[iii] = cycle;
+                        iii++;
+                    }
+                    if (iii < readLength) {
+                        if (multiplyByNegative1)
+                            cycle--;
+                        else
+                            cycle++;
+                    }
+                    if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
+                        cycles[iii] = cycle;
+                        iii++;
+                    }
+
+                }
+            }
+            else { // Negative direction
+                int iii = readLength - 1;
+                while (iii >= 0) {
+                    while (iii >= 0 && bases[iii] == (byte) 'T') {
+                        cycles[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'A') {
+                        cycles[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'C') {
+                        cycles[iii] = cycle;
+                        iii--;
+                    }
+                    while (iii >= 0 && bases[iii] == (byte) 'G') {
+                        cycles[iii] = cycle;
+                        iii--;
+                    }
+                    if (iii >= 0) {
+                        if (multiplyByNegative1)
+                            cycle--;
+                        else
+                            cycle++;
+                    }
+                    if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
+                        cycles[iii] = cycle;
+                        iii--;
+                    }
+                }
+            }
+        }
+
+        // Unknown platforms
+        else {
+            throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
+        }
+        
+        return new CovariateValues(cycles, cycles, cycles);
+    }
+
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
new file mode 100755
index 000000000..0d36f3ff4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
@@ -0,0 +1,77 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.Arrays;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ *
+ * The Reported Quality Score covariate.
+ */
+
+public class QualityScoreCovariate implements RequiredCovariate {
+
+    private byte defaultMismatchesQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
+    private byte defaultInsertionsQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
+    private byte  defaultDeletionsQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
+        
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+        defaultMismatchesQuality = RAC.MISMATCHES_DEFAULT_QUALITY;
+        defaultInsertionsQuality = RAC.INSERTIONS_DEFAULT_QUALITY;
+         defaultDeletionsQuality = RAC.DELETIONS_DEFAULT_QUALITY; 
+    }
+
+    @Override
+    public CovariateValues getValues(final GATKSAMRecord read) {
+        int readLength = read.getReadLength();
+        
+        Byte [] mismatches = new Byte[readLength];
+        Byte [] insertions = new Byte[readLength];
+        Byte []  deletions = new Byte[readLength];
+        
+        byte [] baseQualities = read.getBaseQualities();
+
+        if (defaultMismatchesQuality >= 0)
+            Arrays.fill(mismatches, defaultMismatchesQuality);                  // if the user decides to override the base qualities in the read, use the flat value
+        else {
+            for (int i=0; i<baseQualities.length; i++)
+                mismatches[i] = baseQualities[i];
+        }
+
+        Arrays.fill(insertions, defaultInsertionsQuality);                      // Some day in the future when base insertion and base deletion quals exist the samtools API will
+        Arrays.fill( deletions, defaultDeletionsQuality);                       // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat value (parameter)
+
+        return new CovariateValues(mismatches, insertions, deletions);
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
new file mode 100755
index 000000000..c7a5700e8
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
@@ -0,0 +1,57 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.Arrays;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Oct 30, 2009
+ *
+ * The Read Group covariate.
+ */
+
+public class ReadGroupCovariate implements RequiredCovariate {
+
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {
+    }
+
+    @Override
+    public CovariateValues getValues(final GATKSAMRecord read) {
+        final int l = read.getReadLength();
+        final String readGroupId = read.getReadGroup().getReadGroupId();
+        String [] readGroups = new String[l];
+        Arrays.fill(readGroups, readGroupId);
+        return new CovariateValues(readGroups, readGroups, readGroups);
+    }
+}
+
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
new file mode 100644
index 000000000..6e6227981
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@@ -0,0 +1,698 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import net.sf.samtools.SAMUtils;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.collections.NestedHashMap;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 6, 2009
+ *
+ * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
+ * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias.
+ * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
+ */
+
+public class RecalDataManager {
+    public final NestedHashMap nestedHashMap;                           // The full dataset
+    private final NestedHashMap dataCollapsedReadGroup;                 // Table where everything except read group has been collapsed
+    private final NestedHashMap dataCollapsedQualityScore;              // Table where everything except read group and quality score has been collapsed
+    private final ArrayList<NestedHashMap> dataCollapsedByCovariate;    // Tables where everything except read group, quality score, and given covariate has been collapsed
+
+    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";      // The tag that holds the original quality scores
+    public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ";   // The tag that holds the color space quality scores for SOLID bams
+    public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS";        // The tag that holds the color space for SOLID bams
+    public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC";    // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
+    private static boolean warnUserNullReadGroup = false;
+    private static boolean warnUserNullPlatform = false;
+
+    private static final String COVARS_ATTRIBUTE = "COVARS";                   // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
+
+
+    public enum SOLID_RECAL_MODE {
+        /**
+         * Treat reference inserted bases as reference matching bases. Very unsafe!
+         */
+        DO_NOTHING,
+        /**
+         * Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option.
+         */
+        SET_Q_ZERO,
+        /**
+         * In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV.
+         */
+        SET_Q_ZERO_BASE_N,
+        /**
+         * Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference.
+         */
+        REMOVE_REF_BIAS
+    }
+
+    public enum SOLID_NOCALL_STRATEGY {
+        /**
+         * When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option.
+         */
+        THROW_EXCEPTION,
+        /**
+         * Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare.
+         */
+        LEAVE_READ_UNRECALIBRATED,
+        /**
+         * Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses.
+         */
+        PURGE_READ
+    }
+
+    public RecalDataManager() {
+        nestedHashMap = new NestedHashMap();
+        dataCollapsedReadGroup = null;
+        dataCollapsedQualityScore = null;
+        dataCollapsedByCovariate = null;
+    }
+
+    public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
+        if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
+            nestedHashMap = null;
+            dataCollapsedReadGroup = new NestedHashMap();
+            dataCollapsedQualityScore = new NestedHashMap();
+            dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
+            for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
+                dataCollapsedByCovariate.add(new NestedHashMap());
+            }
+        }
+        else {
+            nestedHashMap = new NestedHashMap();
+            dataCollapsedReadGroup = null;
+            dataCollapsedQualityScore = null;
+            dataCollapsedByCovariate = null;
+        }
+    }
+
+    public static CovariateKeySet getAllCovariateValuesFor(GATKSAMRecord read) {
+        return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
+    }
+    
+    /**
+     * Add the given mapping to all of the collapsed hash tables
+     *
+     * @param key                        The list of comparables that is the key for this mapping
+     * @param fullDatum                  The RecalDatum which is the data for this mapping
+     * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
+     */
+    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) {
+
+        // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
+        //data.put(key, thisDatum); // add the mapping to the main table
+
+        final int qualityScore = Integer.parseInt(key[1].toString());
+        final Object[] readGroupCollapsedKey = new Object[1];
+        final Object[] qualityScoreCollapsedKey = new Object[2];
+        final Object[] covariateCollapsedKey = new Object[3];
+        RecalDatum collapsedDatum;
+
+        // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
+        if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
+            readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
+            collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
+            if (collapsedDatum == null) {
+                dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
+            }
+            else {
+                collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
+            }
+        }
+
+        // Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
+        qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
+        qualityScoreCollapsedKey[1] = key[1]; //                                    and quality score
+        collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
+        if (collapsedDatum == null) {
+            dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
+        }
+        else {
+            collapsedDatum.increment(fullDatum);
+        }
+
+        // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
+        for (int iii = 0; iii < dataCollapsedByCovariate.size(); iii++) {
+            covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
+            covariateCollapsedKey[1] = key[1]; //                                    and quality score ...
+            final Object theCovariateElement = key[iii + 2]; //                                        and the given covariate
+            if (theCovariateElement != null) {
+                covariateCollapsedKey[2] = theCovariateElement;
+                collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
+                if (collapsedDatum == null) {
+                    dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
+                }
+                else {
+                    collapsedDatum.increment(fullDatum);
+                }
+            }
+        }
+    }
+
+    /**
+     * Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
+     * that will be used in the sequential calculation in TableRecalibrationWalker
+     *
+     * @param smoothing The smoothing parameter that goes into empirical quality score calculation
+     * @param maxQual   At which value to cap the quality scores
+     */
+    public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
+
+        recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual);
+        recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
+        for (NestedHashMap map : dataCollapsedByCovariate) {
+            recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
+            checkForSingletons(map.data);
+        }
+    }
+
+    private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) {
+
+        for (Object comp : data.keySet()) {
+            final Object val = data.get(comp);
+            if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
+                ((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual);
+            }
+            else { // Another layer in the nested hash map
+                recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual);
+            }
+        }
+    }
+
+    private void checkForSingletons(final Map data) {
+        // todo -- this looks like it's better just as a data.valueSet() call?
+        for (Object comp : data.keySet()) {
+            final Object val = data.get(comp);
+            if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
+                if (data.keySet().size() == 1) {
+                    data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
+                    // in a previous step of the sequential calculation model
+                }
+            }
+            else { // Another layer in the nested hash map
+                checkForSingletons((Map) val);
+            }
+        }
+    }
+
+    /**
+     * Get the appropriate collapsed table out of the set of all the tables held by this Object
+     *
+     * @param covariate Which covariate indexes the desired collapsed HashMap
+     * @return The desired collapsed HashMap
+     */
+    public final NestedHashMap getCollapsedTable(final int covariate) {
+        if (covariate == 0) {
+            return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
+        }
+        else if (covariate == 1) {
+            return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
+        }
+        else {
+            return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
+        }
+    }
+
+    /**
+     * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
+     *
+     * @param read The read to adjust
+     * @param RAC  The list of shared command line arguments
+     */
+    public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
+        GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
+
+        if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
+            readGroup.setPlatform(RAC.FORCE_PLATFORM);
+        }
+
+        if (readGroup.getPlatform() == null) {
+            if (RAC.DEFAULT_PLATFORM != null) {
+                if (!warnUserNullPlatform) {
+                    Utils.warnUser("The input .bam file contains reads with no platform information. " +
+                            "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
+                            "First observed at read with name = " + read.getReadName());
+                    warnUserNullPlatform = true;
+                }
+                readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
+            }
+            else {
+                throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName());
+            }
+        }
+    }
+
+    /**
+     * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
+     *
+     * @param read The SAMRecord to parse
+     */
+    public static void parseColorSpace(final GATKSAMRecord read) {
+
+        // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
+        if (ReadUtils.isSOLiDRead(read)) {
+            if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
+                final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
+                if (attr != null) {
+                    byte[] colorSpace;
+                    if (attr instanceof String) {
+                        colorSpace = ((String) attr).getBytes();
+                    }
+                    else {
+                        throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
+                    }
+
+                    // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
+                    byte[] readBases = read.getReadBases();
+                    if (read.getReadNegativeStrandFlag()) {
+                        readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
+                    }
+                    final byte[] inconsistency = new byte[readBases.length];
+                    int iii;
+                    byte prevBase = colorSpace[0]; // The sentinel
+                    for (iii = 0; iii < readBases.length; iii++) {
+                        final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
+                        inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1);
+                        prevBase = readBases[iii];
+                    }
+                    read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
+
+                }
+                else {
+                    throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
+                            " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+                }
+            }
+        }
+    }
+
+    /**
+     * Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
+     * This method doesn't add the inconsistent tag to the read like parseColorSpace does
+     *
+     * @param read               The SAMRecord to parse
+     * @param originalQualScores The array of original quality scores to modify during the correction
+     * @param solidRecalMode     Which mode of solid recalibration to apply
+     * @param refBases           The reference for this read
+     * @return A new array of quality scores that have been ref bias corrected
+     */
+    public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
+
+        final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
+        if (attr != null) {
+            byte[] colorSpace;
+            if (attr instanceof String) {
+                colorSpace = ((String) attr).getBytes();
+            }
+            else {
+                throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
+            }
+
+            // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
+            byte[] readBases = read.getReadBases();
+            final byte[] colorImpliedBases = readBases.clone();
+            byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
+            if (read.getReadNegativeStrandFlag()) {
+                readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
+                refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
+            }
+            final int[] inconsistency = new int[readBases.length];
+            byte prevBase = colorSpace[0]; // The sentinel
+            for (int iii = 0; iii < readBases.length; iii++) {
+                final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
+                colorImpliedBases[iii] = thisBase;
+                inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
+                prevBase = readBases[iii];
+            }
+
+            // Now that we have the inconsistency array apply the desired correction to the inconsistent bases
+            if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
+                final boolean setBaseN = false;
+                originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
+            }
+            else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
+                final boolean setBaseN = true;
+                originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
+            }
+            else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
+                solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
+            }
+
+        }
+        else {
+            throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
+                    " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+        }
+
+        return originalQualScores;
+    }
+
+    public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
+        if (ReadUtils.isSOLiDRead(read)) {
+            final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
+            if (attr != null) {
+                byte[] colorSpace;
+                if (attr instanceof String) {
+                    colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
+                }
+                else {
+                    throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
+                }
+
+                for (byte color : colorSpace) {
+                    if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
+                        return true; // There is a bad color in this SOLiD read and the user wants to skip over it
+                    }
+                }
+
+            }
+            else {
+                throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
+                        " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
+            }
+        }
+
+        return false; // There aren't any color no calls in this SOLiD read
+    }
+
+    /**
+     * Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
+     *
+     * @param read               The SAMRecord to recalibrate
+     * @param readBases          The bases in the read which have been RC'd if necessary
+     * @param inconsistency      The array of 1/0 that says if this base is inconsistent with its color
+     * @param originalQualScores The array of original quality scores to set to zero if needed
+     * @param refBases           The reference which has been RC'd if necessary
+     * @param setBaseN           Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
+     * @return The byte array of original quality scores some of which might have been set to zero
+     */
+    private static byte[] solidRecalSetToQZero(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final byte[] refBases, final boolean setBaseN) {
+
+        final boolean negStrand = read.getReadNegativeStrandFlag();
+        for (int iii = 1; iii < originalQualScores.length; iii++) {
+            if (inconsistency[iii] == 1) {
+                if (readBases[iii] == refBases[iii]) {
+                    if (negStrand) {
+                        originalQualScores[originalQualScores.length - (iii + 1)] = (byte) 0;
+                    }
+                    else {
+                        originalQualScores[iii] = (byte) 0;
+                    }
+                    if (setBaseN) {
+                        readBases[iii] = (byte) 'N';
+                    }
+                }
+                // Set the prev base to Q0 as well
+                if (readBases[iii - 1] == refBases[iii - 1]) {
+                    if (negStrand) {
+                        originalQualScores[originalQualScores.length - iii] = (byte) 0;
+                    }
+                    else {
+                        originalQualScores[iii - 1] = (byte) 0;
+                    }
+                    if (setBaseN) {
+                        readBases[iii - 1] = (byte) 'N';
+                    }
+                }
+            }
+        }
+        if (negStrand) {
+            readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
+        }
+        read.setReadBases(readBases);
+
+        return originalQualScores;
+    }
+
+    /**
+     * Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
+     *
+     * @param read              The SAMRecord to recalibrate
+     * @param readBases         The bases in the read which have been RC'd if necessary
+     * @param inconsistency     The array of 1/0 that says if this base is inconsistent with its color
+     * @param colorImpliedBases The bases implied by the color space, RC'd if necessary
+     * @param refBases          The reference which has been RC'd if necessary
+     */
+    private static void solidRecalRemoveRefBias(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, final byte[] refBases) {
+
+        final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
+        if (attr != null) {
+            byte[] colorSpaceQuals;
+            if (attr instanceof String) {
+                String x = (String) attr;
+                colorSpaceQuals = x.getBytes();
+                SAMUtils.fastqToPhred(colorSpaceQuals);
+            }
+            else {
+                throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
+            }
+
+            for (int iii = 1; iii < inconsistency.length - 1; iii++) {
+                if (inconsistency[iii] == 1) {
+                    for (int jjj = iii - 1; jjj <= iii; jjj++) { // Correct this base and the one before it along the direction of the read
+                        if (jjj == iii || inconsistency[jjj] == 0) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
+                            if (readBases[jjj] == refBases[jjj]) {
+                                if (colorSpaceQuals[jjj] == colorSpaceQuals[jjj + 1]) { // Equal evidence for the color implied base and the reference base, so flip a coin
+                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(2);
+                                    if (rand == 0) { // The color implied base won the coin flip
+                                        readBases[jjj] = colorImpliedBases[jjj];
+                                    }
+                                }
+                                else {
+                                    final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
+                                    final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
+                                    int diffInQuality = maxQuality - minQuality;
+                                    int numLow = minQuality;
+                                    if (numLow == 0) {
+                                        numLow++;
+                                        diffInQuality++;
+                                    }
+                                    final int numHigh = Math.round(numLow * (float) Math.pow(10.0f, (float) diffInQuality / 10.0f)); // The color with higher quality is exponentially more likely
+                                    final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(numLow + numHigh);
+                                    if (rand >= numLow) { // higher q score won
+                                        if (maxQuality == (int) colorSpaceQuals[jjj]) {
+                                            readBases[jjj] = colorImpliedBases[jjj];
+                                        } // else ref color had higher q score, and won out, so nothing to do here
+                                    }
+                                    else { // lower q score won
+                                        if (minQuality == (int) colorSpaceQuals[jjj]) {
+                                            readBases[jjj] = colorImpliedBases[jjj];
+                                        } // else ref color had lower q score, and won out, so nothing to do here
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (read.getReadNegativeStrandFlag()) {
+                readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
+            }
+            read.setReadBases(readBases);
+        }
+        else { // No color space quality tag in file
+            throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
+        }
+    }
+
+    /**
+     * Given the base and the color calculate the next base in the sequence
+     *
+     * @param prevBase The base
+     * @param color    The color
+     * @return The next base in the sequence
+     */
+    private static byte getNextBaseFromColor(GATKSAMRecord read, final byte prevBase, final byte color) {
+        switch (color) {
+            case '0':
+                return prevBase;
+            case '1':
+                return performColorOne(prevBase);
+            case '2':
+                return performColorTwo(prevBase);
+            case '3':
+                return performColorThree(prevBase);
+            default:
+                throw new UserException.MalformedBAM(read, "Unrecognized color space in SOLID read, color = " + (char) color +
+                        " Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
+        }
+    }
+
+    /**
+     * Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality
+     *
+     * @param read   The read which contains the color space to check against
+     * @param offset The offset in the read at which to check
+     * @return Returns true if the base was inconsistent with the color space
+     */
+    public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
+        final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
+        if (attr != null) {
+            final byte[] inconsistency = (byte[]) attr;
+            // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
+            if (read.getReadNegativeStrandFlag()) { // Negative direction
+                return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
+            }
+            else { // Forward direction
+                return inconsistency[offset] != (byte) 0;
+            }
+
+            // This block of code is for if you want to check both the offset and the next base for color space inconsistency
+            //if( read.getReadNegativeStrandFlag() ) { // Negative direction
+            //    if( offset == 0 ) {
+            //        return inconsistency[0] != 0;
+            //    } else {
+            //        return (inconsistency[inconsistency.length - offset - 1] != 0) || (inconsistency[inconsistency.length - offset] != 0);
+            //    }
+            //} else { // Forward direction
+            //    if( offset == inconsistency.length - 1 ) {
+            //        return inconsistency[inconsistency.length - 1] != 0;
+            //    } else {
+            //        return (inconsistency[offset] != 0) || (inconsistency[offset + 1] != 0);
+            //    }
+            //}
+
+        }
+        else { // No inconsistency array, so nothing is inconsistent
+            return false;
+        }
+    }
+
+    /**
+     * Computes all requested covariates for every offset in the given read
+     * by calling covariate.getValues(..).
+     *
+     * @param read                The read for which to compute covariate values.
+     * @param requestedCovariates The list of requested covariates.
+     * @return An array of covariate values where result[i][j] is the covariate
+     *         value for the ith position in the read and the jth covariate in
+     *         reqeustedCovariates list.
+     */
+    public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
+        final int numRequestedCovariates = requestedCovariates.size();
+        final int readLength = read.getReadLength();
+        final CovariateKeySet covariateKeySet = new CovariateKeySet(readLength, numRequestedCovariates);
+
+        // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
+        for (Covariate covariate : requestedCovariates)
+            covariateKeySet.addCovariate(covariate.getValues(read));
+
+        read.setTemporaryAttribute(COVARS_ATTRIBUTE, covariateKeySet);
+    }
+
+    /**
+     * Perform a certain transversion (A <-> C or G <-> T) on the base.
+     *
+     * @param base the base [AaCcGgTt]
+     * @return the transversion of the base, or the input base if it's not one of the understood ones
+     */
+    private static byte performColorOne(byte base) {
+        switch (base) {
+            case 'A':
+            case 'a':
+                return 'C';
+            case 'C':
+            case 'c':
+                return 'A';
+            case 'G':
+            case 'g':
+                return 'T';
+            case 'T':
+            case 't':
+                return 'G';
+            default:
+                return base;
+        }
+    }
+
+    /**
+     * Perform a transition (A <-> G or C <-> T) on the base.
+     *
+     * @param base the base [AaCcGgTt]
+     * @return the transition of the base, or the input base if it's not one of the understood ones
+     */
+    private static byte performColorTwo(byte base) {
+        switch (base) {
+            case 'A':
+            case 'a':
+                return 'G';
+            case 'C':
+            case 'c':
+                return 'T';
+            case 'G':
+            case 'g':
+                return 'A';
+            case 'T':
+            case 't':
+                return 'C';
+            default:
+                return base;
+        }
+    }
+
+    /**
+     * Return the complement (A <-> T or C <-> G) of a base.
+     *
+     * @param base the base [AaCcGgTt]
+     * @return the complementary base, or the input base if it's not one of the understood ones
+     */
+    private static byte performColorThree(byte base) {
+        switch (base) {
+            case 'A':
+            case 'a':
+                return 'T';
+            case 'C':
+            case 'c':
+                return 'G';
+            case 'G':
+            case 'g':
+                return 'C';
+            case 'T':
+            case 't':
+                return 'A';
+            default:
+                return base;
+        }
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
new file mode 100755
index 000000000..91f865180
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
@@ -0,0 +1,112 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ *
+ * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
+ */
+
+public class RecalDatum extends RecalDatumOptimized {
+
+    private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
+    private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // constructors
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public RecalDatum() {
+        numObservations = 0L;
+        numMismatches = 0L;
+        estimatedQReported = 0.0;
+        empiricalQuality = 0.0;
+    }
+
+    public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
+        numObservations = _numObservations;
+        numMismatches = _numMismatches;
+        estimatedQReported = _estimatedQReported;
+        empiricalQuality = _empiricalQuality;
+    }
+
+    public RecalDatum(final RecalDatum copy) {
+        this.numObservations = copy.numObservations;
+        this.numMismatches = copy.numMismatches;
+        this.estimatedQReported = copy.estimatedQReported;
+        this.empiricalQuality = copy.empiricalQuality;
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public final void combine(final RecalDatum other) {
+        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
+        this.increment(other.numObservations, other.numMismatches);
+        this.estimatedQReported = -10 * Math.log10(sumErrors / (double) this.numObservations);
+        //if( this.estimatedQReported > QualityUtils.MAX_REASONABLE_Q_SCORE ) { this.estimatedQReported = QualityUtils.MAX_REASONABLE_Q_SCORE; }
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // methods to derive empirical quality score
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public final void calcCombinedEmpiricalQuality(final int smoothing, final int maxQual) {
+        this.empiricalQuality = empiricalQualDouble(smoothing, maxQual);    // cache the value so we don't call log over and over again
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // misc. methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public final double getEstimatedQReported() {
+        return estimatedQReported;
+    }
+
+    public final double getEmpiricalQuality() {
+        return empiricalQuality;
+    }
+
+    private double calcExpectedErrors() {
+        return (double) this.numObservations * qualToErrorProb(estimatedQReported);
+    }
+
+    private double qualToErrorProb(final double qual) {
+        return Math.pow(10.0, qual / -10.0);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
new file mode 100755
index 000000000..233380820
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatumOptimized.java
@@ -0,0 +1,115 @@
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.List;
+
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Jan 6, 2010
+ *
+ * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
+ * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
+ */
+
+public class RecalDatumOptimized {
+
+    protected long numObservations; // number of bases seen in total
+    protected long numMismatches; // number of bases seen that didn't match the reference
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // constructors
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public RecalDatumOptimized() {
+        numObservations = 0L;
+        numMismatches = 0L;
+    }
+
+    public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
+        numObservations = _numObservations;
+        numMismatches = _numMismatches;
+    }
+
+    public RecalDatumOptimized(final RecalDatumOptimized copy) {
+        this.numObservations = copy.numObservations;
+        this.numMismatches = copy.numMismatches;
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public synchronized final void increment(final long incObservations, final long incMismatches) {
+        numObservations += incObservations;
+        numMismatches += incMismatches;
+    }
+
+    public synchronized final void increment(final RecalDatumOptimized other) {
+        increment(other.numObservations, other.numMismatches);
+    }
+
+    public synchronized final void increment(final List<RecalDatumOptimized> data) {
+        for (RecalDatumOptimized other : data) {
+            this.increment(other);
+        }
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // methods to derive empirical quality score
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public final double empiricalQualDouble(final int smoothing, final double maxQual) {
+        final double doubleMismatches = (double) (numMismatches + smoothing);
+        final double doubleObservations = (double) (numObservations + smoothing);
+        double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);        
+        return Math.min(empiricalQual, maxQual);
+    }
+
+    public final byte empiricalQualByte(final int smoothing) {
+        final double doubleMismatches = (double) (numMismatches + smoothing);
+        final double doubleObservations = (double) (numObservations + smoothing);
+        return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
+    }
+
+    public final byte empiricalQualByte() {
+        return empiricalQualByte(0);    // 'default' behavior is to use smoothing value of zero
+    } 
+
+    public final String outputToCSV() {
+        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
new file mode 100755
index 000000000..38e7051e4
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.bqsr;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Hidden;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 27, 2009
+ *
+ * A collection of the arguments that are common to both CovariateCounterWalker and TableRecalibrationWalker.
+ * This set of arguments will also be passed to the constructor of every Covariate when it is instantiated.
+ */
+
+public class RecalibrationArgumentCollection {
+
+    /**
+     * CountCovariates and TableRecalibration accept a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
+     * reads which have had the reference inserted because of color space inconsistencies.
+     */
+    @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
+    public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
+
+    /**
+     * CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
+     * no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in
+     * their color space tag can not be recalibrated.
+     */
+    @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
+    public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+
+    /**
+     * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
+     */
+    @Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "size of the k-mer context to be used for base mismatches", required = false)
+    public int MISMATCHES_CONTEXT_SIZE = 2;
+
+    /**
+     * The context covariate will use a context of this size to calculate it's covariate value for base insertions
+     */
+    @Argument(fullName = "insertions_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions", required = false)
+    public int INSERTIONS_CONTEXT_SIZE = 8;
+
+    /**
+     * The context covariate will use a context of this size to calculate it's covariate value for base deletions
+     */
+    @Argument(fullName = "deletions_context_size", shortName = "dcs", doc = "size of the k-mer context to be used for base deletions", required = false)
+    public int DELETIONS_CONTEXT_SIZE = 8;
+
+    /**
+     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
+     */
+    @Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false)
+    public byte MISMATCHES_DEFAULT_QUALITY = -1;
+
+    /**
+     * A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. (default is on)
+     */
+    @Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false)
+    public byte INSERTIONS_DEFAULT_QUALITY = 45;
+
+    /**
+     * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
+     */
+    @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
+    public byte DELETIONS_DEFAULT_QUALITY = 45;
+
+
+    @Hidden
+    @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
+    public String DEFAULT_PLATFORM = null;
+    @Hidden
+    @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
+    public String FORCE_PLATFORM = null;
+
+
+}

From f52f1f659f9c1d9a0eccc18830dd8e9c7e037cdc Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Fri, 10 Feb 2012 14:15:59 -0500
Subject: [PATCH 47/67] Multiallelic implementation of the TDT should be a
 pairwise list of values as per Mark Daly.  Integration tests change because
 the count in the header is now A instead of 1.

---
 .../TransmissionDisequilibriumTest.java       | 45 ++++++++-----------
 .../VariantAnnotatorIntegrationTest.java      |  2 +-
 2 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
index d84ba44bc..1f8ccf652 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
@@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -58,41 +59,33 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
     // return the descriptions used for the VCF INFO meta field
     public List<String> getKeyNames() { return Arrays.asList("TDT"); }
 
-    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
+    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
 
     // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
-    private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
+    private List<Double> calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
 
-        double nABGivenABandBB = 0.0;
-        double nBBGivenABandBB = 0.0;
-        double nAAGivenABandAB = 0.0;
-        double nBBGivenABandAB = 0.0;
-        double nAAGivenAAandAB = 0.0;
-        double nABGivenAAandAB = 0.0;
+        List<Double> pairwiseTDTs = new ArrayList<Double>(10);
+        final int HomRefIndex = 0;
 
         // for each pair of alleles, add the likelihoods
-        int numAlleles = vc.getNAlleles();
-        for ( int allele1 = 0; allele1 < numAlleles; allele1++ ) {
-            final int HOM1index = determineHomIndex(allele1, numAlleles);
+        int numAltAlleles = vc.getAlternateAlleles().size();
+        for ( int alt = 1; alt <= numAltAlleles; alt++ ) {
+            final int HetIndex = alt;
+            final int HomVarIndex = determineHomIndex(alt, numAltAlleles+1);
 
-            for ( int allele2 = allele1 + 1; allele2 < numAlleles; allele2++ ) {
+            final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HetIndex, HomVarIndex, HetIndex);
+            final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HomVarIndex) + calculateNChildren(vc, triosToTest, HomVarIndex, HomVarIndex, HetIndex);
+            final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HetIndex);
+            final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HomVarIndex, HetIndex, HetIndex);
+            final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, HomRefIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HomRefIndex, HetIndex, HomRefIndex);
+            final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HetIndex, HomRefIndex, HetIndex) + calculateNChildren(vc, triosToTest, HetIndex, HetIndex, HomRefIndex);
 
-                // TODO -- cache these for better performance
-                final int HETindex = HOM1index + (allele2 - allele1);
-                final int HOM2index = determineHomIndex(allele2, numAlleles);
-
-                nABGivenABandBB += calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HETindex, HOM2index, HETindex);
-                nBBGivenABandBB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HOM2index) + calculateNChildren(vc, triosToTest, HOM2index, HOM2index, HETindex);
-                nAAGivenABandAB += calculateNChildren(vc, triosToTest, HOM1index, HETindex, HETindex);
-                nBBGivenABandAB += calculateNChildren(vc, triosToTest, HOM2index, HETindex, HETindex);
-                nAAGivenAAandAB += calculateNChildren(vc, triosToTest, HOM1index, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HOM1index, HETindex, HOM1index);
-                nABGivenAAandAB += calculateNChildren(vc, triosToTest, HETindex, HOM1index, HETindex) + calculateNChildren(vc, triosToTest, HETindex, HETindex, HOM1index);
-            }
+            final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
+            final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
+            pairwiseTDTs.add((numer * numer) / denom);
         }
 
-        final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
-        final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
-        return (numer * numer) / denom;
+        return pairwiseTDTs;
     }
 
     private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index 0d9d9bcd8..7984a00c0 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
 
     @Test
     public void testTDTAnnotation() {
-        final String MD5 = "0aedd760e8099f0b95d53a41bdcd793e";
+        final String MD5 = "a78c1e950740d3c13c0258960c5fa8e1";
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
                         " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,

From 9b8fd4c2ff6c63bc60fbab973590ca495cf99cfd Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sat, 11 Feb 2012 10:57:20 -0500
Subject: [PATCH 48/67] Updating the half of the code that makes use of the
 recalibration information to work with the new refactoring of the bqsr.
 Reverting the covariate interface change in the original bqsr because the
 error model enum was moved to a different class and didn't make sense any
 more.

---
 .../traversals/TraverseActiveRegions.java     |   6 +-
 .../gatk/walkers/ActiveRegionWalker.java      |   4 +-
 .../walkers/annotator/MVLikelihoodRatio.java  |   2 +-
 .../gatk/walkers/bqsr/ContextCovariate.java   |   5 +
 .../sting/gatk/walkers/bqsr/Covariate.java    |   1 +
 .../gatk/walkers/bqsr/CovariateKeySet.java    |  12 +-
 .../gatk/walkers/bqsr/CycleCovariate.java     |   5 +
 .../walkers/bqsr/QualityScoreCovariate.java   |  36 +++---
 .../gatk/walkers/bqsr/ReadGroupCovariate.java |   6 +
 .../gatk/walkers/bqsr/RecalDataManager.java   |  69 ++++++-----
 .../recalibration/ContextCovariate.java       |   2 +-
 .../recalibration/CountCovariatesWalker.java  |   2 +-
 .../gatk/walkers/recalibration/Covariate.java |   2 +-
 .../walkers/recalibration/CycleCovariate.java |   2 +-
 .../walkers/recalibration/DinucCovariate.java |   2 +-
 .../recalibration/GCContentCovariate.java     |   2 +-
 .../recalibration/HomopolymerCovariate.java   |   2 +-
 .../MappingQualityCovariate.java              |   2 +-
 .../recalibration/MinimumNQSCovariate.java    |   2 +-
 .../recalibration/PositionCovariate.java      |   2 +-
 .../recalibration/PrimerRoundCovariate.java   |   2 +-
 .../recalibration/QualityScoreCovariate.java  |  14 +--
 .../recalibration/ReadGroupCovariate.java     |   2 +-
 .../recalibration/RecalDataManager.java       |   5 +-
 .../TableRecalibrationWalker.java             |   2 +-
 .../recalibration/BaseRecalibration.java      | 113 +++++++++---------
 .../sting/utils/sam/GATKSAMRecord.java        |  40 +------
 27 files changed, 165 insertions(+), 179 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 58c2df877..70fe43755 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -107,7 +107,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                 }
 
                 // If this is the last pileup for this shard calculate the minimum alignment start so that we know 
-                //   which active regions in the work queue are now safe to process
+                // which active regions in the work queue are now safe to process
                 if( !locusView.hasNext() ) {
                     for( final PileupElement p : locus.getBasePileup() ) {
                         final GATKSAMRecord read = p.getRead();
@@ -135,7 +135,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                 }
             }
 
-            // Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
+            // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
             while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
                 final ActiveRegion activeRegion = workQueue.remove();
                 sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
@@ -190,7 +190,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
         reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
 
         logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
-        final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
+        final M x = walker.map( activeRegion, null );
         return walker.reduce( x, sum );
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
index 244870c78..6403f15a2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -5,14 +5,12 @@ import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.IntervalBinding;
 import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
 import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
 import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@@ -77,7 +75,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
     public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
 
     // Map over the ActiveRegion
-    public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
+    public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
 
     public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
         final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
index 889cc634c..e38d7d142 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
@@ -22,8 +22,8 @@ import java.util.*;
  * User: chartl
  * Date: 9/14/11
  * Time: 12:24 PM
- * To change this template use File | Settings | File Templates.
  */
+
 public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
 
     private MendelianViolation mendelianViolation = null;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
index a46543f67..c7b90606c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@@ -98,4 +98,9 @@ public class ContextCovariate implements StandardCovariate {
         return s;
     }
 
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Comparable getValue(final String str) {
+        return str;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
index d1726dd13..b99cd3c3c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java
@@ -53,6 +53,7 @@ public interface Covariate {
      */
     public CovariateValues getValues(GATKSAMRecord read);
 
+    public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
 }
 
 interface RequiredCovariate extends Covariate {}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
index 04a0684b6..f71bb03e5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
@@ -15,18 +15,18 @@ public class CovariateKeySet {
 
     private int nextCovariateIndex;
     
-    private final String mismatchesCovariateName = "M";
-    private final String insertionsCovariateName = "I";
-    private final String  deletionsCovariateName = "D";
+    public final static String mismatchesCovariateName = "M";
+    public final static String insertionsCovariateName = "I";
+    public final static String  deletionsCovariateName = "D";
 
     public CovariateKeySet(int readLength, int numberOfCovariates) {
         numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
         this.mismatchesKeySet = new Object[readLength][numberOfCovariates]; 
         this.insertionsKeySet = new Object[readLength][numberOfCovariates];
         this.deletionsKeySet  = new Object[readLength][numberOfCovariates];
-        initializeCovariateKeySet(this.mismatchesKeySet, this.mismatchesCovariateName);
-        initializeCovariateKeySet(this.insertionsKeySet, this.insertionsCovariateName);
-        initializeCovariateKeySet(this.deletionsKeySet,  this.deletionsCovariateName);
+        initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateName);
+        initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateName);
+        initializeCovariateKeySet(this.deletionsKeySet,  deletionsCovariateName);
         this.nextCovariateIndex = 0;
     }
     
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
index f996de50e..a5795c018 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java
@@ -196,4 +196,9 @@ public class CycleCovariate implements StandardCovariate {
         return new CovariateValues(cycles, cycles, cycles);
     }
 
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
+    }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
index 0d36f3ff4..b48e486ac 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java
@@ -39,39 +39,35 @@ import java.util.Arrays;
 
 public class QualityScoreCovariate implements RequiredCovariate {
 
-    private byte defaultMismatchesQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
-    private byte defaultInsertionsQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
-    private byte  defaultDeletionsQuality;                                      // walker parameter. Must be > 0 to be used, otherwise we use the quality from the read.
-        
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
     public void initialize(final RecalibrationArgumentCollection RAC) {
-        defaultMismatchesQuality = RAC.MISMATCHES_DEFAULT_QUALITY;
-        defaultInsertionsQuality = RAC.INSERTIONS_DEFAULT_QUALITY;
-         defaultDeletionsQuality = RAC.DELETIONS_DEFAULT_QUALITY; 
     }
 
     @Override
     public CovariateValues getValues(final GATKSAMRecord read) {
         int readLength = read.getReadLength();
-        
-        Byte [] mismatches = new Byte[readLength];
-        Byte [] insertions = new Byte[readLength];
-        Byte []  deletions = new Byte[readLength];
-        
+
+        Integer [] mismatches = new Integer[readLength];
+        Integer [] insertions = new Integer[readLength];
+        Integer []  deletions = new Integer[readLength];
+
         byte [] baseQualities = read.getBaseQualities();
+        byte [] baseInsertionQualities = read.getBaseInsertionQualities();
+        byte [] baseDeletionQualities = read.getBaseDeletionQualities();
 
-        if (defaultMismatchesQuality >= 0)
-            Arrays.fill(mismatches, defaultMismatchesQuality);                  // if the user decides to override the base qualities in the read, use the flat value
-        else {
-            for (int i=0; i<baseQualities.length; i++)
-                mismatches[i] = baseQualities[i];
+        for (int i=0; i<baseQualities.length; i++) {
+            mismatches[i] = (int) baseQualities[i];
+            insertions[i] = (int) baseInsertionQualities[i];
+            deletions[i] = (int) baseDeletionQualities[i];
         }
 
-        Arrays.fill(insertions, defaultInsertionsQuality);                      // Some day in the future when base insertion and base deletion quals exist the samtools API will
-        Arrays.fill( deletions, defaultDeletionsQuality);                       // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat value (parameter)
-
         return new CovariateValues(mismatches, insertions, deletions);
     }
 
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Comparable getValue(final String str) {
+        return Integer.parseInt(str);
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
index c7a5700e8..74aead9b0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java
@@ -52,6 +52,12 @@ public class ReadGroupCovariate implements RequiredCovariate {
         Arrays.fill(readGroups, readGroupId);
         return new CovariateValues(readGroups, readGroups, readGroups);
     }
+
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Comparable getValue(final String str) {
+        return str;
+    }
 }
 
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
index 6e6227981..a143ff98d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -52,20 +53,23 @@ import java.util.Map;
  */
 
 public class RecalDataManager {
-    public final NestedHashMap nestedHashMap;                           // The full dataset
-    private final NestedHashMap dataCollapsedReadGroup;                 // Table where everything except read group has been collapsed
-    private final NestedHashMap dataCollapsedQualityScore;              // Table where everything except read group and quality score has been collapsed
-    private final ArrayList<NestedHashMap> dataCollapsedByCovariate;    // Tables where everything except read group, quality score, and given covariate has been collapsed
+    public final NestedHashMap nestedHashMap;                                                           // The full dataset
+    private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedReadGroup;                 // Table where everything except read group has been collapsed
+    private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore;              // Table where everything except read group and quality score has been collapsed
+    private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate;    // Tables where everything except read group, quality score, and given covariate has been collapsed
 
-    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";      // The tag that holds the original quality scores
     public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ";   // The tag that holds the color space quality scores for SOLID bams
     public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS";        // The tag that holds the color space for SOLID bams
     public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC";    // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
-    private static boolean warnUserNullReadGroup = false;
     private static boolean warnUserNullPlatform = false;
 
     private static final String COVARS_ATTRIBUTE = "COVARS";                   // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
 
+    public enum BaseRecalibrationType {
+        BASE_SUBSTITUTION,
+        BASE_INSERTION,
+        BASE_DELETION
+    }
 
     public enum SOLID_RECAL_MODE {
         /**
@@ -109,13 +113,18 @@ public class RecalDataManager {
     }
 
     public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
-        if (createCollapsedTables) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
+        if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration
             nestedHashMap = null;
-            dataCollapsedReadGroup = new NestedHashMap();
-            dataCollapsedQualityScore = new NestedHashMap();
-            dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
-            for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
-                dataCollapsedByCovariate.add(new NestedHashMap());
+            dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
+            dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
+            dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
+            for ( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
+                dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
+                dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
+                dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
+                for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
+                    dataCollapsedByCovariate.get(errorModel).add(new NestedHashMap());
+                }
             }
         }
         else {
@@ -137,7 +146,7 @@ public class RecalDataManager {
      * @param fullDatum                  The RecalDatum which is the data for this mapping
      * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
      */
-    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN) {
+    public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel ) {
 
         // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
         //data.put(key, thisDatum); // add the mapping to the main table
@@ -151,9 +160,9 @@ public class RecalDataManager {
         // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
         if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
             readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
-            collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
+            collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(errorModel).get(readGroupCollapsedKey);
             if (collapsedDatum == null) {
-                dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
+                dataCollapsedReadGroup.get(errorModel).put(new RecalDatum(fullDatum), readGroupCollapsedKey);
             }
             else {
                 collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
@@ -163,9 +172,9 @@ public class RecalDataManager {
         // Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
         qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
         qualityScoreCollapsedKey[1] = key[1]; //                                    and quality score
-        collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
+        collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(errorModel).get(qualityScoreCollapsedKey);
         if (collapsedDatum == null) {
-            dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
+            dataCollapsedQualityScore.get(errorModel).put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
         }
         else {
             collapsedDatum.increment(fullDatum);
@@ -178,9 +187,9 @@ public class RecalDataManager {
             final Object theCovariateElement = key[iii + 2]; //                                        and the given covariate
             if (theCovariateElement != null) {
                 covariateCollapsedKey[2] = theCovariateElement;
-                collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
+                collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(errorModel).get(iii).get(covariateCollapsedKey);
                 if (collapsedDatum == null) {
-                    dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
+                    dataCollapsedByCovariate.get(errorModel).get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
                 }
                 else {
                     collapsedDatum.increment(fullDatum);
@@ -198,11 +207,13 @@ public class RecalDataManager {
      */
     public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
 
-        recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing, maxQual);
-        recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing, maxQual);
-        for (NestedHashMap map : dataCollapsedByCovariate) {
-            recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
-            checkForSingletons(map.data);
+        for( final BaseRecalibrationType errorModel : BaseRecalibrationType.values() ) {
+            recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
+            recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
+            for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
+                recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
+                checkForSingletons(map.data);
+            }
         }
     }
 
@@ -241,15 +252,15 @@ public class RecalDataManager {
      * @param covariate Which covariate indexes the desired collapsed HashMap
      * @return The desired collapsed HashMap
      */
-    public final NestedHashMap getCollapsedTable(final int covariate) {
+    public final NestedHashMap getCollapsedTable(final int covariate, final BaseRecalibrationType errorModel) {
         if (covariate == 0) {
-            return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
+            return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed
         }
         else if (covariate == 1) {
-            return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
+            return dataCollapsedQualityScore.get(errorModel); // Table where everything except read group and quality score has been collapsed
         }
         else {
-            return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
+            return dataCollapsedByCovariate.get(errorModel).get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
         }
     }
 
@@ -260,7 +271,7 @@ public class RecalDataManager {
      * @param RAC  The list of shared command line arguments
      */
     public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
-        GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
+        GATKSAMReadGroupRecord readGroup = read.getReadGroup();
 
         if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) {
             readGroup.setPlatform(RAC.FORCE_PLATFORM);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
index 875782fdc..e1a7772db 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java
@@ -56,7 +56,7 @@ public class ContextCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         byte[] bases = read.getReadBases();
         for (int i = 0; i < read.getReadLength(); i++)
             comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index 626460be6..a99f35f45 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -378,7 +378,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
                     }
 
                     RecalDataManager.parseColorSpace(gatkRead);
-                    gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION));
+                    gatkRead.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(gatkRead, requestedCovariates));
                 }
 
                 // Skip this position if base quality is zero
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
index e4edb8ca6..9d5747023 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java
@@ -43,7 +43,7 @@ public interface Covariate {
 
     public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
 
-    public void getValues(GATKSAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType);
+    public void getValues(GATKSAMRecord read, Comparable[] comparable);
     //Takes an array of size (at least) read.getReadLength() and fills it with covariate
     //values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
     //read-specific calculations to be done just once rather than for each offset.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
index 4244af7d1..b8d13ca10 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java
@@ -66,7 +66,7 @@ public class CycleCovariate implements StandardCovariate {
 
     // Used to pick out the covariate's value from attributes of the read
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
 
         //-----------------------------
         // Illumina, Solid, PacBio, and Complete Genomics
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
index 2fa1b33ca..9a401d09f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java
@@ -66,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
      * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
      */
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
         final int readLength = read.getReadLength();
         final boolean negativeStrand = read.getReadNegativeStrandFlag();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
index 7b209ae5c..14ffd35a4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java
@@ -82,7 +82,7 @@ public class GCContentCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
index fd67edc3b..004fb0bdb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
@@ -95,7 +95,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
index e22049890..54fa18106 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java
@@ -55,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
index 1dfb915b9..ecaa55006 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java
@@ -65,7 +65,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
index fbd1efc47..fd720697f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java
@@ -55,7 +55,7 @@ public class PositionCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
index 8dfa11884..d6bdea5bf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java
@@ -62,7 +62,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         for (int iii = 0; iii < read.getReadLength(); iii++) {
             comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
index 1ed4a6fe8..a29a0530c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java
@@ -46,16 +46,10 @@ public class QualityScoreCovariate implements RequiredCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
-        if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
-            byte[] baseQualities = read.getBaseQualities();
-            for (int i = 0; i < read.getReadLength(); i++) {
-                comparable[i] = (int) baseQualities[i];
-            }
-        }
-        else { // model == BASE_INSERTION || model == BASE_DELETION
-            Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
-            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
+        byte[] baseQualities = read.getBaseQualities();
+        for (int i = 0; i < read.getReadLength(); i++) {
+            comparable[i] = (int) baseQualities[i];
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
index 27e1d8263..33adf4417 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java
@@ -44,7 +44,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
     }
 
     @Override
-    public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public void getValues(final GATKSAMRecord read, final Comparable[] comparable) {
         final String readGroupId = read.getReadGroup().getReadGroupId();
         for (int i = 0; i < read.getReadLength(); i++) {
             comparable[i] = readGroupId;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 311e33f8a..1a6b8cfcb 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -63,7 +63,6 @@ public class RecalDataManager {
     public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
     public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
     public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
-    private static boolean warnUserNullReadGroup = false;
     private static boolean warnUserNullPlatform = false;
 
     public enum SOLID_RECAL_MODE {
@@ -604,7 +603,7 @@ public class RecalDataManager {
      *         value for the ith position in the read and the jth covariate in
      *         reqeustedCovariates list.
      */
-    public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
+    public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
         //compute all covariates for this read
         final int numRequestedCovariates = requestedCovariates.size();
         final int readLength = gatkRead.getReadLength();
@@ -613,7 +612,7 @@ public class RecalDataManager {
         final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
 
         for (int i = 0; i < numRequestedCovariates; i++) {                              // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
-            requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
+            requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder);
             for (int j = 0; j < readLength; j++)
                 covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];    // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
index cd848cd9e..08151321f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java
@@ -405,7 +405,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
         }
 
         //compute all covariate values for this read
-        final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
+        final Comparable[][] covariateValues_offset_x_covar = RecalDataManager.computeCovariates(read, requestedCovariates);
 
         // For each base in the read
         for (int offset = 0; offset < read.getReadLength(); offset++) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index 75d4b1e17..2c1bc494a 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -25,14 +25,12 @@
 
 package org.broadinstitute.sting.utils.recalibration;
 
-import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
-import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.bqsr.*;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.collections.NestedHashMap;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.text.XReadLines;
@@ -52,19 +50,13 @@ import java.util.regex.Pattern;
 
 public class BaseRecalibration {
 
-    public enum BaseRecalibrationType {
-        BASE_SUBSTITUTION,
-        BASE_INSERTION,
-        BASE_DELETION
-    }
-
     private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
     private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
     public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
     public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
     public static final String EOF_MARKER = "EOF";
     private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
-    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
+    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
 
     public BaseRecalibration( final File RECAL_FILE ) {
         // Get a list of all available covariates
@@ -89,7 +81,7 @@ public class BaseRecalibration {
                         throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
                     } else { // Found the covariate list in input file, loop through all of them and instantiate them
                         String[] vals = line.split(",");
-                        for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
+                        for( int iii = 0; iii < vals.length - 4; iii++ ) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
                             boolean foundClass = false;
                             for( Class<?> covClass : classes ) {
                                 if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
@@ -160,7 +152,7 @@ public class BaseRecalibration {
         final String[] vals = line.split(",");
 
         // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
+        if( vals.length != requestedCovariates.size() + 4 ) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
             throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
                     " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
         }
@@ -172,39 +164,63 @@ public class BaseRecalibration {
             cov = requestedCovariates.get( iii );
             key[iii] = cov.getValue( vals[iii] );
         }
-
+        final String modelString = vals[iii++];
+        final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION :
+            ( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION :
+            ( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) );
+                
         // Create a new datum using the number of observations, number of mismatches, and reported quality score
         final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
         // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
+        
+        dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
     }
     
-    public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) {
+    public void recalibrateRead( final GATKSAMRecord read ) {
 
-        final byte[] recalQuals = originalQuals.clone();
-        
         //compute all covariate values for this read
-        final Comparable[][] covariateValues_offset_x_covar =
-                RecalDataManager.computeCovariates(read, requestedCovariates, modelType);
-    
-        // For each base in the read
-        for( int offset = 0; offset < read.getReadLength(); offset++ ) {
-    
-            final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
-    
-            Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
-            if(qualityScore == null)
-            {
-                qualityScore = performSequentialQualityCalculation( fullCovariateKey );
-                qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
-            }
-    
-            recalQuals[offset] = qualityScore;
-        }
-    
-        preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
+        RecalDataManager.computeCovariates(read, requestedCovariates);
+        final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
+
+        for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
+            final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() :
+                ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() :
+                ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) );
+            final byte[] recalQuals = originalQuals.clone();
+
+            // For each base in the read
+            for( int offset = 0; offset < read.getReadLength(); offset++ ) {
         
-        return recalQuals;
+                final Object[] fullCovariateKey =
+                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) :
+                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) :
+                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) );
+        
+                Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
+                if( qualityScore == null ) {
+                    qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
+                    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
+                }
+        
+                recalQuals[offset] = qualityScore;
+            }
+        
+            preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
+            switch (errorModel) {
+                case BASE_SUBSTITUTION:
+                    read.setBaseQualities( recalQuals );
+                    break;
+                case BASE_INSERTION:
+                    read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals );
+                    break;
+                case BASE_DELETION:
+                    read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals );
+                    break;
+                default:
+                    throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
+            }
+        }
+       
     }
 
     /**
@@ -222,7 +238,7 @@ public class BaseRecalibration {
      * @param key The list of Comparables that were calculated from the covariates
      * @return A recalibrated quality score as a byte
      */
-    private byte performSequentialQualityCalculation( final Object... key ) {
+    private byte performSequentialQualityCalculation( final RecalDataManager.BaseRecalibrationType errorModel, final Object... key ) {
 
         final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
         final Object[] readGroupCollapsedKey = new Object[1];
@@ -231,7 +247,7 @@ public class BaseRecalibration {
 
         // The global quality shift (over the read group only)
         readGroupCollapsedKey[0] = key[0];
-        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
+        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0, errorModel).get( readGroupCollapsedKey ));
         double globalDeltaQ = 0.0;
         if( globalRecalDatum != null ) {
             final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
@@ -242,7 +258,7 @@ public class BaseRecalibration {
         // The shift in quality between reported and empirical
         qualityScoreCollapsedKey[0] = key[0];
         qualityScoreCollapsedKey[1] = key[1];
-        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
+        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1, errorModel).get( qualityScoreCollapsedKey ));
         double deltaQReported = 0.0;
         if( qReportedRecalDatum != null ) {
             final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
@@ -256,7 +272,7 @@ public class BaseRecalibration {
         covariateCollapsedKey[1] = key[1];
         for( int iii = 2; iii < key.length; iii++ ) {
             covariateCollapsedKey[2] =  key[iii]; // The given covariate
-            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
+            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii, errorModel).get( covariateCollapsedKey ));
             if( covariateRecalDatum != null ) {
                 deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
                 deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
@@ -265,18 +281,6 @@ public class BaseRecalibration {
 
         final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
         return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
-
-        // Verbose printouts used to validate with old recalibrator
-        //if(key.contains(null)) {
-        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
-        //}
-        //else {
-        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
-        //}
-
-        //return newQualityByte;
     }
 
     /**
@@ -291,5 +295,4 @@ public class BaseRecalibration {
             }
         }
     }
-
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index bdcf2b210..f6b3d759c 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -54,7 +54,6 @@ public class GATKSAMRecord extends BAMRecord {
     // Base Quality Score Recalibrator specific attribute tags
     public static final String BQSR_BASE_INSERTION_QUALITIES = "BI";
     public static final String BQSR_BASE_DELETION_QUALITIES = "BD";
-    public static final String BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG = "BR";
 
     // the SAMRecord data we're caching
     private String mReadString = null;
@@ -163,27 +162,6 @@ public class GATKSAMRecord extends BAMRecord {
         return super.equals(o);
     }
 
-
-    @Override
-    public byte[] getBaseQualities() {
-        return super.getBaseQualities();
-        /*
-        if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
-            return super.getBaseQualities();
-        } else {
-            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
-            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
-                final byte[] quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, super.getBaseQualities() );
-                setBaseQualities(quals);
-                setAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG, true );
-                return quals;
-            } else { // just use the qualities that are in the read since we don't have the sufficient information to recalibrate on the fly
-                return super.getBaseQualities();
-            }
-        }
-        */
-    }
-
     /**
      * Accessors for base insertion and base deletion quality scores
      */
@@ -191,13 +169,8 @@ public class GATKSAMRecord extends BAMRecord {
         byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
         if( quals == null ) {
             quals = new byte[getBaseQualities().length];
-            Arrays.fill(quals, (byte) 45); // allow for differing default values between BaseInsertions and BaseDeletions
-            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
-            // else give default values which are flat Q45
-            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
-                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
-            }
-            // add the qual array to the read so that we don't have to do the recalibration work again
+            Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
+            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
             setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
         }
         return quals;
@@ -207,13 +180,8 @@ public class GATKSAMRecord extends BAMRecord {
         byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
         if( quals == null ) {
             quals = new byte[getBaseQualities().length];
-            Arrays.fill(quals, (byte) 45);
-            // if the recal data was populated in the engine then recalibrate the quality scores on the fly
-            // else give default values which are flat Q45
-            if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
-                quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
-            }
-            // add the qual array to the read so that we don't have to do the recalibration work again
+            Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
+            // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
             setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
         }
         return quals;

From 3caa1b83bb220d1de1f7d5d3bc3b04b86f09c519 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sat, 11 Feb 2012 11:48:32 -0500
Subject: [PATCH 49/67] Updating HC integration tests

---
 .../sting/gatk/walkers/bqsr/RecalDataManager.java               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
index a143ff98d..8a255391f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@@ -181,7 +181,7 @@ public class RecalDataManager {
         }
 
         // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
-        for (int iii = 0; iii < dataCollapsedByCovariate.size(); iii++) {
+        for (int iii = 0; iii < dataCollapsedByCovariate.get(errorModel).size(); iii++) {
             covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
             covariateCollapsedKey[1] = key[1]; //                                    and quality score ...
             final Object theCovariateElement = key[iii + 2]; //                                        and the given covariate

From 41ffd08d534cc56701b0257d01922dcef886a199 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 13 Feb 2012 12:35:09 -0500
Subject: [PATCH 51/67] On the fly base quality score recalibration now happens
 up front in a SAMIterator on input instead of in a lazy-loading fashion if
 the BQSR table is provided as an engine argument. On the fly recalibration is
 now completely hooked up and live.

---
 .../sting/gatk/GenomeAnalysisEngine.java      | 11 ++--
 .../sting/gatk/ReadProperties.java            | 38 +++++++-------
 .../gatk/datasources/reads/SAMDataSource.java | 14 +++++-
 .../traversals/TraverseActiveRegions.java     |  4 +-
 .../gatk/walkers/bqsr/ContextCovariate.java   |  2 +-
 .../gatk/walkers/bqsr/CovariateKeySet.java    | 33 ++++++++++--
 .../sting/utils/baq/BAQSamIterator.java       |  2 +-
 .../sting/utils/fragments/FragmentUtils.java  | 19 ++++++-
 .../utils/recalibration/BQSRSamIterator.java  | 50 +++++++++++++++++++
 .../recalibration/BaseRecalibration.java      | 35 ++++---------
 .../sting/utils/sam/GATKSAMRecord.java        | 35 +++++++++++--
 .../reads/DownsamplerBenchmark.java           |  3 +-
 .../LocusIteratorByStateUnitTest.java         |  1 +
 13 files changed, 183 insertions(+), 64 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index c0db75aa9..50ef4653b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -185,12 +185,12 @@ public class GenomeAnalysisEngine {
     public static void resetRandomGenerator(long seed) { randomGenerator.setSeed(seed); }
 
     /**
-     *  Static base quality score recalibration helper object
+     *  Base Quality Score Recalibration helper object
      */
-    private static BaseRecalibration baseRecalibration = null;
-    public static BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
-    public static boolean hasBaseRecalibration() { return baseRecalibration != null; }
-    public static void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); }
+    private BaseRecalibration baseRecalibration = null;
+    public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
+    public boolean hasBaseRecalibration() { return baseRecalibration != null; }
+    public void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); }
 
     /**
      * Actually run the GATK with the specified walker.
@@ -770,6 +770,7 @@ public class GenomeAnalysisEngine {
                 getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
                 getWalkerBAQQualityMode(),
                 refReader,
+                getBaseRecalibration(),
                 argCollection.defaultBaseQualities);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java
index daa8ff60d..db22886ce 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java
@@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 
 import java.util.Collection;
 /**
@@ -27,23 +28,20 @@ import java.util.Collection;
  * information about how they should be downsampled, sorted, and filtered.
  */
 public class ReadProperties {
-    private Collection<SAMReaderID> readers = null;
-    private SAMFileHeader header = null;
-    private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
-    private DownsamplingMethod downsamplingMethod = null;
-    private ValidationExclusion exclusionList = null;
-    private Collection<ReadFilter> supplementalFilters = null;
-    private boolean includeReadsWithDeletionAtLoci = false;
-    private boolean useOriginalBaseQualities = false;
-    private boolean generateExtendedEvents = false;
-    private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF;
-    private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY;
-    IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired
-    private byte defaultBaseQualities;
-
-    // do we want to generate additional piles of "extended" events (indels)
-// immediately after the reference base such event is associated with?
-
+    private final Collection<SAMReaderID> readers;
+    private final SAMFileHeader header;
+    private final SAMFileReader.ValidationStringency validationStringency;
+    private final DownsamplingMethod downsamplingMethod;
+    private final ValidationExclusion exclusionList;
+    private final Collection<ReadFilter> supplementalFilters;
+    private final boolean includeReadsWithDeletionAtLoci;
+    private final boolean useOriginalBaseQualities;
+    private final boolean generateExtendedEvents;
+    private final BAQ.CalculationMode cmode;
+    private final BAQ.QualityMode qmode;
+    private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired
+    private final BaseRecalibration bqsrApplier;
+    private final byte defaultBaseQualities;
 
     /**
      * Return true if the walker wants to see reads that contain deletions when looking at locus pileups
@@ -126,6 +124,8 @@ public class ReadProperties {
         return refReader;
     }
 
+    public BaseRecalibration getBQSRApplier() { return bqsrApplier; }
+
     /**
      * @return Default base quality value to fill reads missing base quality information.
      */
@@ -165,8 +165,9 @@ public class ReadProperties {
            boolean includeReadsWithDeletionAtLoci,
            boolean generateExtendedEvents,
            BAQ.CalculationMode cmode,
-           BAQ.QualityMode qmode,
+           BAQ.QualityMode qmode,           
            IndexedFastaSequenceFile refReader,
+           BaseRecalibration bqsrApplier,
            byte defaultBaseQualities) {
         this.readers = samFiles;
         this.header = header;
@@ -180,6 +181,7 @@ public class ReadProperties {
         this.cmode = cmode;
         this.qmode = qmode;
         this.refReader = refReader;
+        this.bqsrApplier = bqsrApplier;
         this.defaultBaseQualities = defaultBaseQualities;
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 27b9e7f77..70284b2a6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -46,6 +46,8 @@ import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.baq.BAQSamIterator;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator;
+import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
 
 import java.io.File;
@@ -201,6 +203,7 @@ public class SAMDataSource {
                 BAQ.CalculationMode.OFF,
                 BAQ.QualityMode.DONT_MODIFY,
                 null, // no BAQ
+                null, // no BQSR
                 (byte) -1);
     }
 
@@ -237,6 +240,7 @@ public class SAMDataSource {
             BAQ.CalculationMode cmode,
             BAQ.QualityMode qmode,
             IndexedFastaSequenceFile refReader,
+            BaseRecalibration bqsrApplier,
             byte defaultBaseQualities) {
         this.readMetrics = new ReadMetrics();
         this.genomeLocParser = genomeLocParser;
@@ -309,6 +313,7 @@ public class SAMDataSource {
                 cmode,
                 qmode,
                 refReader,
+                bqsrApplier,
                 defaultBaseQualities);
 
         // cache the read group id (original) -> read group id (merged)
@@ -591,6 +596,7 @@ public class SAMDataSource {
                 readProperties.getBAQCalculationMode(),
                 readProperties.getBAQQualityMode(),
                 readProperties.getRefReader(),
+                readProperties.getBQSRApplier(),
                 readProperties.defaultBaseQualities());
     }
 
@@ -660,9 +666,10 @@ public class SAMDataSource {
                                                         BAQ.CalculationMode cmode,
                                                         BAQ.QualityMode qmode,
                                                         IndexedFastaSequenceFile refReader,
+                                                        BaseRecalibration bqsrApplier,
                                                         byte defaultBaseQualities) {
-        if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
-            // only wrap if we are replacing the original qualitiies or using a default base quality
+        if (useOriginalBaseQualities || defaultBaseQualities >= 0)
+            // only wrap if we are replacing the original qualities or using a default base quality
             wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
 
         // NOTE: this (and other filtering) should be done before on-the-fly sorting
@@ -675,6 +682,9 @@ public class SAMDataSource {
         if (!noValidationOfReadOrder && enableVerification)
             wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
 
+        if (bqsrApplier != null)
+            wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
+
         if (cmode != BAQ.CalculationMode.OFF)
             wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 70fe43755..92c508f85 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -68,7 +68,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                 if(prevLoc != null) {
                     for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {       
                         final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
-                        if( initialIntervals.overlaps( fakeLoc ) ) {
+                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
                             final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
                                     : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
                             isActiveList.add( isActiveProb );
@@ -89,7 +89,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                 final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
 
                 // Call the walkers isActive function for this locus and add them to the list to be integrated later
-                if( initialIntervals.overlaps( location ) ) {
+                if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
                     final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus )
                                                                                      : ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
                     isActiveList.add( isActiveProb );
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
index c7b90606c..64f1d08a8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@@ -79,7 +79,7 @@ public class ContextCovariate implements StandardCovariate {
     }
 
     /**
-     * calculates the context of a base indenpendent of the covariate mode
+     * calculates the context of a base independent of the covariate mode
      *
      * @param bases           the bases in the read to build the context from
      * @param offset          the position in the read to calculate the context for
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
index f71bb03e5..1b62160a3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateKeySet.java
@@ -1,5 +1,7 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
 /**
  * The object temporarily held by a read that describes all of it's covariates. 
  * 
@@ -15,9 +17,9 @@ public class CovariateKeySet {
 
     private int nextCovariateIndex;
     
-    public final static String mismatchesCovariateName = "M";
-    public final static String insertionsCovariateName = "I";
-    public final static String  deletionsCovariateName = "D";
+    private static String mismatchesCovariateName = "M";
+    private static String insertionsCovariateName = "I";
+    private static String  deletionsCovariateName = "D";
 
     public CovariateKeySet(int readLength, int numberOfCovariates) {
         numberOfCovariates++;                                               // +1 because we are adding the mismatch covariate (to comply with the molten table format)
@@ -36,7 +38,30 @@ public class CovariateKeySet {
         transposeCovariateValues(deletionsKeySet,  covariate.getDeletions());
         nextCovariateIndex++;
     }
-    
+
+    public static RecalDataManager.BaseRecalibrationType getErrorModelFromString(final String modelString) {
+        if (modelString.equals(mismatchesCovariateName))
+            return RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION;
+        else if (modelString.equals(insertionsCovariateName))
+            return RecalDataManager.BaseRecalibrationType.BASE_INSERTION;
+        else if (modelString.equals(deletionsCovariateName))
+            return RecalDataManager.BaseRecalibrationType.BASE_DELETION;
+        throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
+    }
+
+    public Object[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
+        switch (errorModel) {
+            case BASE_SUBSTITUTION:
+                    return getMismatchesKeySet(readPosition);
+            case BASE_INSERTION:
+                    return getInsertionsKeySet(readPosition);
+            case BASE_DELETION:
+                    return getDeletionsKeySet(readPosition);
+            default:
+                    throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
+        }
+    }
+
     public Object[] getMismatchesKeySet(int readPosition) {
         return mismatchesKeySet[readPosition];
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java
index 26356a4a4..adfeef518 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java
@@ -34,7 +34,7 @@ public class BAQSamIterator implements StingSAMIterator {
             "cmode != null" ,
             "qmode != null"})
     public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
-        if ( cmode == BAQ.CalculationMode.OFF) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
+        if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
         if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
 
         this.refReader = refReader;
diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
index 68bf6dce8..7104b1edd 100644
--- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
@@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
@@ -150,13 +151,23 @@ public class FragmentUtils {
         final int numBases = firstReadStop + secondRead.getReadLength();
         final byte[] bases = new byte[numBases];
         final byte[] quals = new byte[numBases];
+        // BUGBUG: too verbose, clean this up.
+        final byte[] insertionQuals = new byte[numBases];
+        final byte[] deletionQuals = new byte[numBases];
         final byte[] firstReadBases = firstRead.getReadBases();
         final byte[] firstReadQuals = firstRead.getBaseQualities();
+        final byte[] firstReadInsertionQuals = firstRead.getBaseInsertionQualities();
+        final byte[] firstReadDeletionQuals = firstRead.getBaseDeletionQualities();
         final byte[] secondReadBases = secondRead.getReadBases();
         final byte[] secondReadQuals = secondRead.getBaseQualities();
+        final byte[] secondReadInsertionQuals = secondRead.getBaseInsertionQualities();
+        final byte[] secondReadDeletionQuals = secondRead.getBaseDeletionQualities();
+
         for(int iii = 0; iii < firstReadStop; iii++) {
             bases[iii] = firstReadBases[iii];
             quals[iii] = firstReadQuals[iii];
+            insertionQuals[iii] = firstReadInsertionQuals[iii];
+            deletionQuals[iii] = firstReadDeletionQuals[iii];
         }
         for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
             if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
@@ -164,16 +175,22 @@ public class FragmentUtils {
             }
             bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] );
             quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] );
+            insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
+            deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score
         }
         for(int iii = firstRead.getReadLength(); iii < numBases; iii++) {
             bases[iii] = secondReadBases[iii-firstReadStop];
             quals[iii] = secondReadQuals[iii-firstReadStop];
+            insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
+            deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
         }
 
         final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader());
         returnRead.setAlignmentStart(firstRead.getUnclippedStart());
         returnRead.setReadBases( bases );
-        returnRead.setBaseQualities( quals );
+        returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION );
+        returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
+        returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
         returnRead.setReadGroup( firstRead.getReadGroup() );
         returnRead.setReferenceName( firstRead.getReferenceName() );
         final CigarElement c = new CigarElement(bases.length, CigarOperator.M);
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java
new file mode 100644
index 000000000..048f8e58c
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java
@@ -0,0 +1,50 @@
+package org.broadinstitute.sting.utils.recalibration;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.Iterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 2/13/12
+ */
+
+public class BQSRSamIterator implements StingSAMIterator {
+    private final StingSAMIterator it;
+    private final BaseRecalibration bqsr;
+
+    /**
+     * Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads.
+     *
+     * @param it    The incoming SamIterator to wrap
+     * @param bqsr  The object which holds the BQSR table information and knows how to apply it
+     */
+    @Requires({
+            "it != null",
+            "bqsr != null"})
+    public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) {
+        if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object");
+
+        this.it = it;
+        this.bqsr = bqsr;
+    }
+
+    @Requires("hasNext()")
+    @Ensures("result != null")
+    public SAMRecord next()     {
+        SAMRecord read = it.next();
+        bqsr.recalibrateRead((GATKSAMRecord) read);
+        return read;
+    }
+
+    public boolean hasNext()    { return this.it.hasNext(); }
+    public void remove()        { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
+    public void close()         { it.close(); }
+    public Iterator<SAMRecord> iterator() { return this; }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index 2c1bc494a..b08365a78 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.text.XReadLines;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Pattern;
 
@@ -165,10 +166,8 @@ public class BaseRecalibration {
             key[iii] = cov.getValue( vals[iii] );
         }
         final String modelString = vals[iii++];
-        final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION :
-            ( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION :
-            ( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) );
-                
+        final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString);
+
         // Create a new datum using the number of observations, number of mismatches, and reported quality score
         final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
         // Add that datum to all the collapsed tables which will be used in the sequential calculation
@@ -183,19 +182,16 @@ public class BaseRecalibration {
         final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read );
 
         for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) {
-            final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() :
-                ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() :
-                ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) );
+            final byte[] originalQuals = read.getBaseQualities( errorModel );
             final byte[] recalQuals = originalQuals.clone();
 
             // For each base in the read
             for( int offset = 0; offset < read.getReadLength(); offset++ ) {
         
-                final Object[] fullCovariateKey =
-                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) :
-                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) :
-                        ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) );
-        
+                final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
+
+                final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
+
                 Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
                 if( qualityScore == null ) {
                     qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
@@ -206,21 +202,8 @@ public class BaseRecalibration {
             }
         
             preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
-            switch (errorModel) {
-                case BASE_SUBSTITUTION:
-                    read.setBaseQualities( recalQuals );
-                    break;
-                case BASE_INSERTION:
-                    read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals );
-                    break;
-                case BASE_DELETION:
-                    read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals );
-                    break;
-                default:
-                    throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
-            }
+            read.setBaseQualities( recalQuals, errorModel );
         }
-       
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index f6b3d759c..2172cfb94 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -25,9 +25,9 @@
 package org.broadinstitute.sting.utils.sam;
 
 import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
 import org.broadinstitute.sting.utils.NGSPlatform;
-import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.Arrays;
 import java.util.HashMap;
@@ -163,8 +163,37 @@ public class GATKSAMRecord extends BAMRecord {
     }
 
     /**
-     * Accessors for base insertion and base deletion quality scores
+     * Setters and Accessors for base insertion and base deletion quality scores
      */
+    public void setBaseQualities( final byte[] quals, final RecalDataManager.BaseRecalibrationType errorModel ) {
+        switch( errorModel ) {
+            case BASE_SUBSTITUTION:
+                setBaseQualities(quals);
+                break;
+            case BASE_INSERTION:
+                setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals );
+                break;
+            case BASE_DELETION:
+                setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals );
+                break;
+            default:
+                throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
+        }
+    }
+
+    public byte[] getBaseQualities( final RecalDataManager.BaseRecalibrationType errorModel ) {
+        switch( errorModel ) {
+            case BASE_SUBSTITUTION:
+                return getBaseQualities();
+            case BASE_INSERTION:
+                return getBaseInsertionQualities();
+            case BASE_DELETION:
+                return getBaseDeletionQualities();
+            default:
+                throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
+        }
+    }
+
     public byte[] getBaseInsertionQualities() {
         byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
         if( quals == null ) {
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
index 5da8cebf4..20f3e1e35 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
@@ -79,7 +79,8 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
                                                                false,
                                                                BAQ.CalculationMode.OFF,
                                                                BAQ.QualityMode.DONT_MODIFY,
-                                                               null,
+                                                               null, // no BAQ
+                                                               null, // no BQSR
                                                                (byte)0);
 
             GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
index 4011594f3..04e11db54 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
@@ -308,6 +308,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
                 BAQ.CalculationMode.OFF,
                 BAQ.QualityMode.DONT_MODIFY,
                 null, // no BAQ
+                null, // no BQSR
                 (byte) -1
         );
     }

From e9338e2c2040fd1ff258992af5177d39e87058b7 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 13 Feb 2012 13:40:41 -0500
Subject: [PATCH 52/67] Context covariate needs to look in the reverse
 direction for negative stranded reads.

---
 .../gatk/walkers/bqsr/ContextCovariate.java   | 34 ++++++++++++++++---
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
index 64f1d08a8..89a30e4f5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@@ -25,6 +25,7 @@
 
 package org.broadinstitute.sting.gatk.walkers.bqsr;
 
+import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
@@ -68,16 +69,31 @@ public class ContextCovariate implements StandardCovariate {
         String[] mismatches = new String [l];
         String[] insertions = new String [l];
         String[]  deletions = new String [l];
-        
+
+        final boolean negativeStrand = read.getReadNegativeStrandFlag();
         byte[] bases = read.getReadBases();
+        if (negativeStrand) {
+            bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
+        }
         for (int i = 0; i < read.getReadLength(); i++) {
             mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
             insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
             deletions[i]  = contextWith(bases, i,  deletionsContextSize,  deletionsNoContext);
         }
+        if (negativeStrand) {
+            reverse(mismatches);
+            reverse(insertions);
+            reverse(deletions);
+        }
         return new CovariateValues(mismatches, insertions, deletions);
     }
 
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Comparable getValue(final String str) {
+        return str;
+    }
+
     /**
      * calculates the context of a base independent of the covariate mode
      *
@@ -98,9 +114,17 @@ public class ContextCovariate implements StandardCovariate {
         return s;
     }
 
-    // Used to get the covariate's value from input csv file during on-the-fly recalibration
-    @Override
-    public final Comparable getValue(final String str) {
-        return str;
+    /**
+     * Reverses the given array in place.
+     *
+     * @param array any array
+     */
+    private static void reverse(final Comparable[] array) {
+        final int arrayLength = array.length;
+        for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
+            final Comparable temp = array[l];
+            array[l] = array[r];
+            array[r] = temp;
+        }
     }
 }

From 14981bed10fe519f60a58305bdf71d59f802b123 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 13 Feb 2012 14:32:03 -0500
Subject: [PATCH 53/67] Cleaning up VariantsToTable: added docs for supported
 fields; removed one-off hidden arguments for multi-allelics; default behavior
 is now to include multi-allelics in one record; added option to split
 multi-allelics into separate records.

---
 .../walkers/variantutils/VariantsToTable.java | 117 +++++++++---------
 .../VariantsToTableIntegrationTest.java       |   4 +-
 2 files changed, 59 insertions(+), 62 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index e43d54e14..9f4718ef2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
 
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
-import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@@ -49,7 +48,13 @@ import java.util.*;
  * fields to print with the -F NAME, each of which appears as a single column in
  * the output file, with a header named NAME, and the value of this field in the VCF
  * one per line.  NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding
- * in the INFO field (AC=10).  Note that this tool does not support capturing any
+ * in the INFO field (AC=10).  In addition, there are specially supported values like
+ * EVENTLENGTH (length of the event), TRANSITION (for SNPs), HET (count of het genotypes),
+ * HOM-REF (count of homozygous reference genotypes), HOM-VAR (count of homozygous variant
+ * genotypes), NO-CALL (count of no-call genotypes), TYPE (the type of event), VAR (count of
+ * non-reference genotypes), NSAMPLES (number of samples), NCALLED (number of called samples),
+ * GQ (from the genotype field; works only for a file with a single sample), and MULTI-ALLELIC
+ * (is the record from a multi-allelic site).  Note that this tool does not support capturing any
  * GENOTYPE field values.  If a VCF record is missing a value, then the tool by
  * default throws an error, but the special value NA can be emitted instead with
  * appropriate tool arguments.
@@ -121,18 +126,13 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
     int nRecords = 0;
 
     /**
-     * By default, only biallelic (REF=A, ALT=B) sites are including in the output.  If this flag is provided, then
-     * VariantsToTable will emit field values for records with multiple ALT alleles.  Note that in general this
-     * can make your resulting file unreadable and malformated according to tools like R, as the representation of
-     * multi-allelic INFO field values can be lists of values.
+     * By default, records with multiple ALT alleles will comprise just one line of output; note that in general this can make your resulting file
+     * unreadable/malformed for certain tools like R, as the representation of multi-allelic INFO field values are often comma-separated lists
+     * of values.  Using the flag will cause multi-allelic records to be split into multiple lines of output (one for each allele in the ALT field);
+     * INFO field values that are not lists are copied for each of the output records while only the appropriate entry is used for lists.
      */
-    @Advanced
-    @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
-    public boolean keepMultiAllelic = false;
-
-    @Hidden
-    @Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false)
-    public boolean logACSum = false;
+    @Argument(fullName="splitMultiAllelic", shortName="SMA", doc="If provided, we will split multi-allelic records into multiple lines of output", required=false)
+    public boolean splitMultiAllelic = false;
 
     /**
      * By default, this tool throws a UserException when it encounters a field without a value in some record.  This
@@ -144,6 +144,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
     @Advanced
     @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false)
     public boolean ALLOW_MISSING_DATA = false;
+    private final static String MISSING_DATA = "NA";
 
     public void initialize() {
         // print out the header
@@ -155,9 +156,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
             return 0;
 
         for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) {
-            if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) {
-                List<String> vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA, keepMultiAllelic, logACSum);
-                out.println(Utils.join("\t", vals));
+            if ( showFiltered || vc.isNotFiltered() ) {
+                for ( final List<String> record : extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA, splitMultiAllelic) )
+                    out.println(Utils.join("\t", record));
             }
         }
         
@@ -180,22 +181,23 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
      *
      * @param vc the VariantContext whose field values we can to capture
      * @param fields a non-null list of fields to capture from VC
-     * @param allowMissingData if false, then throws a UserException if any field isn't found in vc.  Otherwise
-     *   provides a value of NA
-     *   @param kma if true, multiallelic variants are to be kept
-     *   @param logsum if true, AF and AC are computed based on sum of allele counts. Otherwise, based on allele with highest count.
-     * @return
+     * @param allowMissingData if false, then throws a UserException if any field isn't found in vc.  Otherwise provides a value of NA
+     * @param splitMultiAllelic  if true, multiallelic variants are to be split into multiple records
+     * @return List of lists of field values
      */
-    private static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData, boolean kma, boolean logsum) {
-        List<String> vals = new ArrayList<String>();
+    private static List<List<String>> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData, boolean splitMultiAllelic) {
+        
+        final int numRecordsToProduce = splitMultiAllelic ? vc.getAlternateAlleles().size() : 1;
+        final List<List<String>> records = new ArrayList<List<String>>(numRecordsToProduce);
+        for ( int i = 0; i < numRecordsToProduce; i++ )
+            records.add(new ArrayList<String>(fields.size()));
 
         for ( String field : fields ) {
-            String val = "NA";
 
             if ( getters.containsKey(field) ) {
-                val = getters.get(field).get(vc);
+                addFieldValue(getters.get(field).get(vc), records);
             } else if ( vc.hasAttribute(field) ) {
-                val = vc.getAttributeAsString(field, null);
+                addFieldValue(vc.getAttribute(field, null), records);
             } else if ( isWildCard(field) ) {
                 Set<String> wildVals = new HashSet<String>();
                 for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {
@@ -204,51 +206,47 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
                     }
                 }
 
+                String val = MISSING_DATA;
                 if ( wildVals.size() > 0 ) {
                     List<String> toVal = new ArrayList<String>(wildVals);
                     Collections.sort(toVal);
                     val = Utils.join(",", toVal);
                 }
+
+                addFieldValue(val, records);
             } else if ( ! allowMissingData ) {
                 throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
+            } else {
+                addFieldValue(MISSING_DATA, records);
             }
-
-            if (field.equals("AF") || field.equals("AC")) {
-                     String afo = val;
-
-                     double af=0;
-                     if (afo.contains(",")) {
-                         String[] afs = afo.split(",");
-                         afs[0] = afs[0].substring(1,afs[0].length());
-                         afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
-
-                         double[] afd = new double[afs.length];
-
-                         for (int k=0; k < afd.length; k++)
-                             afd[k] = Double.valueOf(afs[k]);
-
-                         if (kma && logsum)
-                             af = MathUtils.sum(afd);
-                         else
-                         af = MathUtils.arrayMax(afd);
-                         //af = Double.valueOf(afs[0]);
-
-                     }
-                     else
-                         if (!afo.equals("NA"))
-                             af = Double.valueOf(afo);
-
-                val = Double.toString(af);
-
-            }
-            vals.add(val);
         }
 
-        return vals;
+        return records;
     }
 
-    public static List<String> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
-        return extractFields(vc, fields, allowMissingData, false, false);
+    private static void addFieldValue(Object val, List<List<String>> result) {
+        final int numResultRecords = result.size();
+        
+        // if we're trying to create a single output record, add it
+        if ( numResultRecords == 1 ) {
+            result.get(0).add(val.toString());
+        }
+        // if this field is a list of the proper size, add the appropriate entry to each record
+        else if ( (val instanceof List) && ((List)val).size() == numResultRecords ) {
+            final List list = (List)val;
+            for ( int i = 0; i < numResultRecords; i++ )
+                result.get(i).add(list.get(i).toString());
+        }
+        // otherwise, add the original value to all of the records
+        else {
+            final String valStr = val.toString();
+            for ( List<String> record : result )
+                record.add(valStr);
+        }
+    }
+
+    public static List<List<String>> extractFields(VariantContext vc, List<String> fields, boolean allowMissingData) {
+        return extractFields(vc, fields, allowMissingData, false);
     }
     //
     // default reduce -- doesn't do anything at all
@@ -321,6 +319,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
         getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
         getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
         getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
+        getters.put("MULTI-ALLELIC", new Getter() { public String get(VariantContext vc) { return Boolean.toString(vc.getAlternateAlleles().size() > 1); } });
         getters.put("GQ", new Getter() { public String get(VariantContext vc) {
             if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF");
             return String.format("%.2f", -10 * vc.getGenotype(0).getLog10PError());
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
index 19021c1c2..0ab593e7a 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
@@ -27,10 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
 import org.broadinstitute.sting.WalkerTest;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.annotations.Test;
-import org.testng.annotations.DataProvider;
 
 import java.util.*;
-import java.io.File;
 
 public class VariantsToTableIntegrationTest extends WalkerTest {
     private String variantsToTableCmd(String moreArgs) {
@@ -38,7 +36,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
                 " --variant:vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" +
                 " -T VariantsToTable" +
                 " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" +
-                " -L chr1 -KMA -o %s" + moreArgs;
+                " -L chr1 -o %s" + moreArgs;
     }
 
     @Test(enabled = true)

From 0920a1921eb02d3d6000dd380086732dd635cfc6 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 13 Feb 2012 15:09:53 -0500
Subject: [PATCH 54/67] Minor fixes to splitting multi-allelic records (as
 regards printing indel alleles correctly); minor code refactoring; adding
 integration tests to cover +/- splitting multi-allelics.

---
 .../walkers/variantutils/VariantsToTable.java | 30 ++++++++++++++-----
 .../VariantsToTableIntegrationTest.java       | 22 ++++++++++++++
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
index 9f4718ef2..4c8e8df5c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@@ -194,7 +194,9 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
 
         for ( String field : fields ) {
 
-            if ( getters.containsKey(field) ) {
+            if ( splitMultiAllelic && field.equals("ALT") ) { // we need to special case the ALT field when splitting out multi-allelic records
+                addFieldValue(splitAltAlleles(vc), records);
+            } else if ( getters.containsKey(field) ) {
                 addFieldValue(getters.get(field).get(vc), records);
             } else if ( vc.hasAttribute(field) ) {
                 addFieldValue(vc.getAttribute(field, null), records);
@@ -271,9 +273,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
         getters.put("REF", new Getter() {
             public String get(VariantContext vc) {
                 StringBuilder x = new StringBuilder();
-                if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
-                    x.append((char)vc.getReferenceBaseForIndel().byteValue());
-                x.append(vc.getReference().getDisplayString());
+                x.append(getAlleleDisplayString(vc, vc.getReference()));
                 return x.toString();
             }
         });
@@ -285,9 +285,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
 
                 for ( int i = 0; i < n; i++ ) {
                     if ( i != 0 ) x.append(",");
-                    if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
-                        x.append((char)vc.getReferenceBaseForIndel().byteValue());
-                    x.append(vc.getAlternateAllele(i).getDisplayString());
+                    x.append(getAlleleDisplayString(vc, vc.getAlternateAllele(i)));
                 }
                 return x.toString();
             }
@@ -325,5 +323,23 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
             return String.format("%.2f", -10 * vc.getGenotype(0).getLog10PError());
         }});
     }
+    
+    private static String getAlleleDisplayString(VariantContext vc, Allele allele) {
+        StringBuilder sb = new StringBuilder();
+        if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
+            sb.append((char)vc.getReferenceBaseForIndel().byteValue());
+        sb.append(allele.getDisplayString());
+        return sb.toString();
+    }
+    
+    private static Object splitAltAlleles(VariantContext vc) {
+        final int numAltAlleles = vc.getAlternateAlleles().size();
+        if ( numAltAlleles == 1 )
+            return getAlleleDisplayString(vc, vc.getAlternateAllele(0));
 
+        final List<String> alleles = new ArrayList<String>(numAltAlleles);
+        for ( Allele allele : vc.getAlternateAlleles() )
+            alleles.add(getAlleleDisplayString(vc, allele));
+        return alleles;
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
index 0ab593e7a..6188f2255 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java
@@ -39,6 +39,14 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
                 " -L chr1 -o %s" + moreArgs;
     }
 
+    private String variantsToTableMultiAllelicCmd(String moreArgs) {
+        return "-R " + b37KGReference +
+                " --variant " + validationDataLocation + "/multiallelic.vcf" +
+                " -T VariantsToTable" +
+                " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
+                " -o %s" + moreArgs;
+    }
+
     @Test(enabled = true)
     public void testComplexVariantsToTable() {
         WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(" -AMD"),
@@ -51,4 +59,18 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
         WalkerTestSpec spec = new WalkerTestSpec(variantsToTableCmd(""), 1, UserException.class);
         executeTest("testComplexVariantsToTable-FAIL", spec);
     }
+
+    @Test(enabled = true)
+    public void testMultiAllelicOneRecord() {
+        WalkerTestSpec spec = new WalkerTestSpec(variantsToTableMultiAllelicCmd(""),
+                Arrays.asList("13dd36c08be6c800f23988e6000d963e"));
+        executeTest("testMultiAllelicOneRecord", spec).getFirst();
+    }
+
+    @Test(enabled = true)
+    public void testMultiAllelicSplitRecords() {
+        WalkerTestSpec spec = new WalkerTestSpec(variantsToTableMultiAllelicCmd(" -SMA"),
+                Arrays.asList("17a0fc80409d2fc00ad2bbb94b3a346b"));
+        executeTest("testMultiAllelicSplitRecords", spec).getFirst();
+    }
 }

From 03e5184741945e5ffc89348dc2ba8c976858b11c Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 13 Feb 2012 13:40:58 -0500
Subject: [PATCH 56/67] Fix serious engine bug that could cause reads to be
 dropped under certain circumstances

When aggregating raw BAM file spans into shards, the IntervalSharder tries to combine
file spans when it can. Unfortunately, the method that combines two BAM file
spans was seriously flawed, and would produce a truncated union if the file spans
overlapped in certain ways. This could cause entire regions of the BAM file containing
reads within the requested intervals to be dropped.

Modified GATKBAMFileSpan.union() to correct this problem, and added unit tests
to verify that the correct union is produced regardless of how the file spans
happen to overlap.

Thanks to Khalid, who did at least as much work on this bug as I did.
---
 .../src/net/sf/samtools/GATKBAMFileSpan.java  | 10 ++-
 .../java/src/net/sf/samtools/GATKChunk.java   | 12 ++++
 .../sf/samtools/GATKBAMFileSpanUnitTest.java  | 70 ++++++++++++++++++-
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/public/java/src/net/sf/samtools/GATKBAMFileSpan.java b/public/java/src/net/sf/samtools/GATKBAMFileSpan.java
index 4692c6671..ffc40067a 100644
--- a/public/java/src/net/sf/samtools/GATKBAMFileSpan.java
+++ b/public/java/src/net/sf/samtools/GATKBAMFileSpan.java
@@ -143,10 +143,14 @@ public class GATKBAMFileSpan extends BAMFileSpan {
         List<GATKChunk> mergedUnion = new ArrayList<GATKChunk>();
         GATKChunk currentChunk = unmergedUnion.remove();
         while(!unmergedUnion.isEmpty()) {
-            // Find the end of this range of chunks.
-            while(!unmergedUnion.isEmpty() && currentChunk.getChunkEnd() >= unmergedUnion.peek().getChunkStart()) {
+
+            // While the current chunk can be merged with the next chunk:
+            while( ! unmergedUnion.isEmpty() &&
+                   (currentChunk.overlaps(unmergedUnion.peek()) || currentChunk.isAdjacentTo(unmergedUnion.peek())) ) {
+
+                // Merge the current chunk with the next chunk:
                 GATKChunk nextChunk = unmergedUnion.remove();
-                currentChunk = new GATKChunk(currentChunk.getChunkStart(),nextChunk.getChunkEnd());
+                currentChunk = currentChunk.merge(nextChunk);
             }
             // Add the accumulated range.
             mergedUnion.add(currentChunk);
diff --git a/public/java/src/net/sf/samtools/GATKChunk.java b/public/java/src/net/sf/samtools/GATKChunk.java
index 5d349e72e..c48567f6e 100644
--- a/public/java/src/net/sf/samtools/GATKChunk.java
+++ b/public/java/src/net/sf/samtools/GATKChunk.java
@@ -96,4 +96,16 @@ public class GATKChunk extends Chunk {
         final int offsetSpan = (int)((getChunkEnd()&0xFFFF)-(getChunkStart()&0xFFFF));
         return chunkSpan + offsetSpan;
     }
+
+    /**
+     * Merges two chunks together. The caller is responsible for testing whether the
+     * chunks overlap/are adjacent before calling this method!
+     *
+     * @param other the chunk to merge with this chunk
+     * @return a new chunk representing the union of the two chunks (provided the chunks were
+     *         overlapping/adjacent)
+     */
+    public GATKChunk merge ( GATKChunk other ) {
+        return new GATKChunk(Math.min(getChunkStart(), other.getChunkStart()), Math.max(getChunkEnd(), other.getChunkEnd()));
+    }
 }
diff --git a/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java b/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java
index a9586c3c8..dbb4ea225 100644
--- a/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java
+++ b/public/java/test/net/sf/samtools/GATKBAMFileSpanUnitTest.java
@@ -50,7 +50,10 @@ public class GATKBAMFileSpanUnitTest {
     }
 
     @Test
-    public void testUnionOfOverlappingFileSpans() {
+    public void testUnionOfContiguousFileSpans() {
+        // Region 1 ends at position adjacent to Region 2 start:
+        // |---1----|---2----|
+
         GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,1<<16));
         GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
         GATKBAMFileSpan union = regionOne.union(regionTwo);
@@ -58,6 +61,71 @@ public class GATKBAMFileSpanUnitTest {
         Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535));
     }
 
+    @Test
+    public void testUnionOfFileSpansFirstRegionEndsWithinSecondRegion() {
+        // Region 1 ends within Region 2:
+        //        |---2----|
+        // |---1----|
+
+        GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|32767));
+        GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
+        GATKBAMFileSpan union = regionOne.union(regionTwo);
+        Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
+        Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535));
+    }
+
+    @Test
+    public void testUnionOfFileSpansFirstRegionEndsAtSecondRegionEnd() {
+        // Region 1 ends at Region 2 end:
+        //        |---2----|
+        // |---1-----------|
+
+        GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|65535));
+        GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
+        GATKBAMFileSpan union = regionOne.union(regionTwo);
+        Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
+        Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535));
+    }
+
+    @Test
+    public void testUnionOfFileSpansFirstRegionEndsAfterSecondRegionEnd() {
+        // Region 1 ends after Region 2 end:
+        //        |---2----|
+        // |---1---------------|
+
+        GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(0,(1<<16)|65535));
+        GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|32767));
+        GATKBAMFileSpan union = regionOne.union(regionTwo);
+        Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
+        Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(0,(1<<16)|65535));
+    }
+
+    @Test
+    public void testUnionOfFileSpansFirstRegionStartsAtSecondRegionStart() {
+        // Region 1 starts at Region 2 start, but ends before Region 2:
+        // |---2--------|
+        // |---1----|
+
+        GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|32767));
+        GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
+        GATKBAMFileSpan union = regionOne.union(regionTwo);
+        Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
+        Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(1<<16,(1<<16)|65535));
+    }
+
+    @Test
+    public void testUnionOfFileSpansFirstRegionEqualToSecondRegion() {
+        // Region 1 and Region 2 represent the same region:
+        // |---2----|
+        // |---1----|
+
+        GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
+        GATKBAMFileSpan regionTwo = new GATKBAMFileSpan(new GATKChunk(1<<16,(1<<16)|65535));
+        GATKBAMFileSpan union = regionOne.union(regionTwo);
+        Assert.assertEquals(union.getGATKChunks().size(),1,"Elements to be merged were not.");
+        Assert.assertEquals(union.getGATKChunks().get(0),new GATKChunk(1<<16,(1<<16)|65535));
+    }
+
     @Test
     public void testUnionOfStringOfFileSpans() {
         GATKBAMFileSpan regionOne = new GATKBAMFileSpan(new GATKChunk[] { new GATKChunk(0,1<<16), new GATKChunk(2<<16,3<<16) });

From ae5b42c88456c8f0b93bfd737ae636f6d40b3537 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Tue, 14 Feb 2012 14:01:04 -0500
Subject: [PATCH 60/67] Put base insertion and base deletions in the SAMRecord
 as a string of quality scores instead of an array of bytes. Start of a proper
 genotype given alleles mode in HaplotypeCaller

---
 .../sting/gatk/walkers/bqsr/RecalDataManager.java  |  1 +
 .../org/broadinstitute/sting/utils/Haplotype.java  |  4 ++++
 .../utils/recalibration/BaseRecalibration.java     |  6 ++----
 .../sting/utils/sam/GATKSAMRecord.java             | 14 ++++++--------
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
index 8a255391f..cc60ac010 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@@ -58,6 +58,7 @@ public class RecalDataManager {
     private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore;              // Table where everything except read group and quality score has been collapsed
     private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate;    // Tables where everything except read group, quality score, and given covariate has been collapsed
 
+    public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
     public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ";   // The tag that holds the color space quality scores for SOLID bams
     public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS";        // The tag that holds the color space for SOLID bams
     public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC";    // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index df682f215..e10a810fd 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -109,6 +109,10 @@ public class Haplotype {
         return isReference;
     }
 
+    public byte[] insertAllele( final Allele a ) {
+        return getBases();
+    }
+
     public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
                                                                final int haplotypeSize, final int numPrefBases) {
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index b08365a78..4a366bc02 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.collections.NestedHashMap;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.text.XReadLines;
@@ -189,13 +188,12 @@ public class BaseRecalibration {
             for( int offset = 0; offset < read.getReadLength(); offset++ ) {
         
                 final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
-
                 final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
 
-                Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
+                Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
                 if( qualityScore == null ) {
                     qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
-                    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
+                    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
                 }
         
                 recalQuals[offset] = qualityScore;
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 2172cfb94..f5a9b2f45 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -171,10 +171,10 @@ public class GATKSAMRecord extends BAMRecord {
                 setBaseQualities(quals);
                 break;
             case BASE_INSERTION:
-                setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals );
+                setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) );
                 break;
             case BASE_DELETION:
-                setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals );
+                setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) );
                 break;
             default:
                 throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
@@ -195,23 +195,23 @@ public class GATKSAMRecord extends BAMRecord {
     }
 
     public byte[] getBaseInsertionQualities() {
-        byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES );
+        byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
         if( quals == null ) {
             quals = new byte[getBaseQualities().length];
             Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
             // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
-            setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
+            setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
         }
         return quals;
     }
 
     public byte[] getBaseDeletionQualities() {
-        byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES );
+        byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
         if( quals == null ) {
             quals = new byte[getBaseQualities().length];
             Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
             // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
-            setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
+            setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
         }
         return quals;
     }
@@ -259,12 +259,10 @@ public class GATKSAMRecord extends BAMRecord {
         return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
     }
 
-
     ///////////////////////////////////////////////////////////////////////////////
     // *** GATKSAMRecord specific methods                                     ***//
     ///////////////////////////////////////////////////////////////////////////////
 
-
     /**
      * Checks whether an attribute has been set for the given key.
      *

From 7dc6f733995760369fd6693b20328728ebb4e3f1 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Tue, 14 Feb 2012 21:11:24 -0500
Subject: [PATCH 61/67] Bug fix for validation site selector: records with AC=0
 in them were always being thrown out if input vcf was sites-only, even when
 -ignorePolymorphicStatus flag was set

---
 .../UniformSamplingFrequencySelector.java         | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
index 66720a252..eda75d647 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java
@@ -52,13 +52,16 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
             if (! selectedInTargetSamples && !IGNORE_POLYMORPHIC)
                 return;
         } else  {
-            if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )  {
-                int ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
-                if (ac == 0) return; // site not polymorphic
+            if (!IGNORE_POLYMORPHIC) {
+                if (vc.getAttributes().containsKey(VCFConstants.ALLELE_COUNT_KEY))
+                {
+                    int ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
+                    if (ac == 0) return; // site not polymorphic
+                }
+                else
+                    // no allele count field in VC
+                    return;
             }
-            else
-                return;
-
         }
         // create bare-bones event and log in corresponding bin
         // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes

From cd352f502d8226ed29851dca9c928bfc70355ce8 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Fri, 17 Feb 2012 10:21:37 -0500
Subject: [PATCH 62/67] Corner case bug fix: if a read starts with an
 insertion, when computing the consensus allele for calling the insertion was
 only added to the last element in the consensus key hash map. Now, an
 insertion that partially overlaps with several candidate alleles will have
 their respective count increased for all of them

---
 .../IndelGenotypeLikelihoodsCalculationModel.java    | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index fe2086d47..6321ef1f6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -152,15 +152,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 // case 1: current insertion is prefix of indel in hash map
                                 consensusIndelStrings.put(s,cnt+1);
                                 foundKey = true;
-                                break;
-                            }
+                             }
                             else if (indelString.startsWith(s)) {
                                 // case 2: indel stored in hash table is prefix of current insertion
                                 // In this case, new bases are new key.
                                 consensusIndelStrings.remove(s);
                                 consensusIndelStrings.put(indelString,cnt+1);
                                 foundKey = true;
-                                break;
                             }
                         }
                         if (!foundKey)
@@ -176,8 +174,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 // case 1: current insertion is suffix of indel in hash map
                                 consensusIndelStrings.put(s,cnt+1);
                                 foundKey = true;
-                                break;
-                            }
+                             }
                             else if (indelString.endsWith(s)) {
                                 // case 2: indel stored in hash table is suffix of current insertion
                                 // In this case, new bases are new key.
@@ -185,7 +182,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 consensusIndelStrings.remove(s);
                                 consensusIndelStrings.put(indelString,cnt+1);
                                 foundKey = true;
-                                break;
                             }
                         }
                         if (!foundKey)
@@ -233,9 +229,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                 maxAlleleCnt = curCnt;
                 bestAltAllele = s;
             }
-//            if (DEBUG)
-//                System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s)  );
-        }         //gdebug-
+        }
 
         if (maxAlleleCnt <  minIndelCountForGenotyping)
             return aList;

From f2ef8d1d2342cd4b6d80283587d6db7931d146ec Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Fri, 17 Feb 2012 17:15:53 -0500
Subject: [PATCH 63/67] Reverting last commit until I learn how to effectively
 replicate and debug pipeline test failures, and until I also learn how to
 effectively remove a kep from a HashMap that's being iterated on

---
 .../IndelGenotypeLikelihoodsCalculationModel.java    | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 6321ef1f6..fe2086d47 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -152,13 +152,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 // case 1: current insertion is prefix of indel in hash map
                                 consensusIndelStrings.put(s,cnt+1);
                                 foundKey = true;
-                             }
+                                break;
+                            }
                             else if (indelString.startsWith(s)) {
                                 // case 2: indel stored in hash table is prefix of current insertion
                                 // In this case, new bases are new key.
                                 consensusIndelStrings.remove(s);
                                 consensusIndelStrings.put(indelString,cnt+1);
                                 foundKey = true;
+                                break;
                             }
                         }
                         if (!foundKey)
@@ -174,7 +176,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 // case 1: current insertion is suffix of indel in hash map
                                 consensusIndelStrings.put(s,cnt+1);
                                 foundKey = true;
-                             }
+                                break;
+                            }
                             else if (indelString.endsWith(s)) {
                                 // case 2: indel stored in hash table is suffix of current insertion
                                 // In this case, new bases are new key.
@@ -182,6 +185,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                                 consensusIndelStrings.remove(s);
                                 consensusIndelStrings.put(indelString,cnt+1);
                                 foundKey = true;
+                                break;
                             }
                         }
                         if (!foundKey)
@@ -229,7 +233,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                 maxAlleleCnt = curCnt;
                 bestAltAllele = s;
             }
-        }
+//            if (DEBUG)
+//                System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s)  );
+        }         //gdebug-
 
         if (maxAlleleCnt <  minIndelCountForGenotyping)
             return aList;

From 78718b8d6a88de69a7537769ca5b4c63bcac8169 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sat, 18 Feb 2012 10:31:26 -0500
Subject: [PATCH 64/67] Adding Genotype Given Alleles mode to the
 HaplotypeCaller. It constructs the possible haplotypes via assembly and then
 injects the desired allele to be genotyped.

---
 .../traversals/TraverseActiveRegions.java     |   3 +-
 .../broadinstitute/sting/utils/Haplotype.java | 134 +++++++++++++++++-
 2 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 92c508f85..3f24e6585 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -69,8 +69,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                     for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {       
                         final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
                         if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
-                                    : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
+                            final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
                             isActiveList.add( isActiveProb );
                             if( firstIsActiveStart == null ) {
                                 firstIsActiveStart = fakeLoc;
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index e10a810fd..d48deab1b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -24,11 +24,17 @@
 
 package org.broadinstitute.sting.utils;
 
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 
@@ -109,8 +115,52 @@ public class Haplotype {
         return isReference;
     }
 
-    public byte[] insertAllele( final Allele a ) {
-        return getBases();
+    public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
+                                final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
+        
+        if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
+        int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
+        if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
+            return getBases().clone();
+        }
+        haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
+        final byte[] newHaplotype = getBases().clone();
+
+        try {
+            if( refAllele.length() == altAllele.length() ) { // SNP or MNP
+                for( int iii = 0; iii < altAllele.length(); iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+            } else if( refAllele.length() < altAllele.length() ) { // insertion
+                final int altAlleleLength = altAllele.length();
+                for( int iii = newHaplotype.length -1; iii > haplotypeInsertLocation + altAlleleLength; iii-- ) {
+                    newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
+                }
+                for( int iii = 0; iii < altAlleleLength; iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+            } else { // deletion
+                int refHaplotypeOffset = 0;
+                for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
+                    if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
+                    else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
+                }
+                for( int iii = 0; iii < altAllele.length(); iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+                final int shift = refAllele.length() - altAllele.length();
+                for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
+                    newHaplotype[iii] = newHaplotype[iii+shift];
+                }
+                for( int iii = 0; iii < shift; iii++ ) {
+                    newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
+                }
+            }
+        } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
+            return getBases().clone();
+        }
+        
+        return newHaplotype;
     }
 
     public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
@@ -169,4 +219,84 @@ public class Haplotype {
         return haplotypeMap;
     }
 
+    private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
+        int readBases = 0;
+        int refBases = 0;
+        boolean fallsInsideDeletion = false;
+
+        int goal = refCoord - haplotypeStart;  // The goal is to move this many reference bases
+        boolean goalReached = refBases == goal;
+
+        Iterator<CigarElement> cigarElementIterator = haplotypeCigar.getCigarElements().iterator();
+        while (!goalReached && cigarElementIterator.hasNext()) {
+            CigarElement cigarElement = cigarElementIterator.next();
+            int shift = 0;
+
+            if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
+                if (refBases + cigarElement.getLength() < goal)
+                    shift = cigarElement.getLength();
+                else
+                    shift = goal - refBases;
+
+                refBases += shift;
+            }
+            goalReached = refBases == goal;
+
+            if (!goalReached && cigarElement.getOperator().consumesReadBases())
+                readBases += cigarElement.getLength();
+
+            if (goalReached) {
+                // Is this base's reference position within this cigar element? Or did we use it all?
+                boolean endsWithinCigar = shift < cigarElement.getLength();
+
+                // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
+                // since we checked if the goal coordinate is within the read length, so this is just a sanity check.
+                if (!endsWithinCigar && !cigarElementIterator.hasNext())
+                    return -1;
+
+                CigarElement nextCigarElement;
+
+                // if we end inside the current cigar element, we just have to check if it is a deletion
+                if (endsWithinCigar)
+                    fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
+
+                    // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
+                else {
+                    nextCigarElement = cigarElementIterator.next();
+
+                    // if it's an insertion, we need to clip the whole insertion before looking at the next element
+                    if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
+                        readBases += nextCigarElement.getLength();
+                        if (!cigarElementIterator.hasNext())
+                            return -1;
+
+                        nextCigarElement = cigarElementIterator.next();
+                    }
+
+                    // if it's a deletion, we will pass the information on to be handled downstream.
+                    fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
+                }
+
+                // If we reached our goal outside a deletion, add the shift
+                if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
+                    readBases += shift;
+
+                    // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
+                    // to add the shift of the current cigar element but go back to it's last element to return the last
+                    // base before the deletion (see warning in function contracts)
+                else if (fallsInsideDeletion && !endsWithinCigar)
+                    readBases += shift - 1;
+
+                    // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
+                else if (fallsInsideDeletion && endsWithinCigar)
+                    readBases--;
+            }
+        }
+
+        if (!goalReached)
+            return -1;
+
+        return (fallsInsideDeletion ? -1 : readBases);
+    }
+
 }

From a8be96f63dbc5545c516ada8acd96bf05b59904d Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sat, 18 Feb 2012 10:54:39 -0500
Subject: [PATCH 65/67] This caching in the BQSR seems to be too slow now that
 there are so many keys

---
 .../sting/utils/recalibration/BaseRecalibration.java  | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
index 4a366bc02..74083ced2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@@ -190,11 +190,12 @@ public class BaseRecalibration {
                 final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
                 final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
 
-                Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
-                if( qualityScore == null ) {
-                    qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
-                    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
-                }
+                // BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
+                //Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
+                //if( qualityScore == null ) {
+                    final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
+                //    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
+                //}
         
                 recalQuals[offset] = qualityScore;
             }

From 0f5674b95e9dd18df55cf09dbdd79f8bc9ec46fa Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Mon, 20 Feb 2012 09:12:51 -0500
Subject: [PATCH 67/67] Redid fix for corner case when forming consensus with
 reads that start/end with insertions and that don't agree with each other in
 inserted bases: since I can't iterate over the elements of a HashMap because
 keys might change during iteration, and since I can't use ConcurrentHashMaps,
 the code now copies structure of (bases, number of times seen) into
 ArrayList, which can be addressed by element index in order to iterate on it.

---
 ...elGenotypeLikelihoodsCalculationModel.java | 77 +++++++++++--------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index 49c131ce2..7ee7b0752 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
@@ -141,62 +142,76 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                 String indelString = p.getEventBases();
                 if (p.isInsertion()) {
                     boolean foundKey = false;
+                    // copy of hashmap into temp arrayList
+                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
+                    for (String s : consensusIndelStrings.keySet()) {
+                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    }
+
                     if (read.getAlignmentEnd() == loc.getStart()) {
                         // first corner condition: a read has an insertion at the end, and we're right at the insertion.
                         // In this case, the read could have any of the inserted bases and we need to build a consensus
-                        for (String s : consensusIndelStrings.keySet()) {
-                            int cnt = consensusIndelStrings.get(s);
+
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            // case 1: current insertion is prefix of indel in hash map
                             if (s.startsWith(indelString)) {
-                                // case 1: current insertion is prefix of indel in hash map
-                                consensusIndelStrings.put(s, cnt + 1);
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
                                 foundKey = true;
-                                break;
-                            } else if (indelString.startsWith(s)) {
+                            }
+                            else if (indelString.startsWith(s)) {
                                 // case 2: indel stored in hash table is prefix of current insertion
                                 // In this case, new bases are new key.
-                                consensusIndelStrings.remove(s);
-                                consensusIndelStrings.put(indelString, cnt + 1);
                                 foundKey = true;
-                                break;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
                             }
                         }
                         if (!foundKey)
                             // none of the above: event bases not supported by previous table, so add new key
-                            consensusIndelStrings.put(indelString, 1);
+                            cList.add(new Pair<String, Integer>(indelString,1));
 
-                    } else if (read.getAlignmentStart() == loc.getStart() + 1) {
+                    }
+                    else if (read.getAlignmentStart() == loc.getStart()+1) {
                         // opposite corner condition: read will start at current locus with an insertion
-                        for (String s : consensusIndelStrings.keySet()) {
-                            int cnt = consensusIndelStrings.get(s);
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
                             if (s.endsWith(indelString)) {
-                                // case 1: current insertion is suffix of indel in hash map
-                                consensusIndelStrings.put(s, cnt + 1);
+                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
                                 foundKey = true;
-                                break;
-                            } else if (indelString.endsWith(s)) {
-                                // case 2: indel stored in hash table is suffix of current insertion
+                            }
+                            else if (indelString.endsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
                                 // In this case, new bases are new key.
-
-                                consensusIndelStrings.remove(s);
-                                consensusIndelStrings.put(indelString, cnt + 1);
                                 foundKey = true;
-                                break;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
                             }
                         }
                         if (!foundKey)
                             // none of the above: event bases not supported by previous table, so add new key
-                            consensusIndelStrings.put(indelString, 1);
+                            cList.add(new Pair<String, Integer>(indelString,1));
 
-                    } else {
-                        // normal case: insertion somewhere in the middle of a read: add count to hash map
-                        int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
-                        consensusIndelStrings.put(indelString, cnt + 1);
+
+                    }
+                    else {
+                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
+                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
                     }
 
-                } else if (p.isDeletion()) {
-                    indelString = String.format("D%d", p.getEventLength());
-                    int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
-                    consensusIndelStrings.put(indelString, cnt + 1);
+                    // copy back arrayList into hashMap
+                    consensusIndelStrings.clear();
+                    for (Pair<String,Integer> pair : cList) {
+                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
+                    }
+
+                }
+                else if (p.isDeletion()) {
+                    indelString = String.format("D%d",p.getEventLength());
+                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                    consensusIndelStrings.put(indelString,cnt+1);
 
                 }
             }