Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-08-03 13:14:43 -04:00 · 2012-08-03 13:14:43 -04:00 · c3b6e2b143
parent ff80f17721 8817fc70d1
commit c3b6e2b143
101 changed files with 1603 additions and 787 deletions
--- a/ivy.xml
+++ b/ivy.xml
@ -52,7 +52,7 @@
        <dependency org="gov.nist" name="Jama" rev="1.0.2"/>

        <!-- Dependencies for the graph aligner -->
-        <dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
+        <dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>

        <!-- Dependencies for the html walker documention -->
        <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

 public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@ -68,10 +68,10 @@ public class ErrorModel  {
                    break;
                }
            }
+            haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
            if (refSampleVC.isIndel()) {
                pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
                        UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
-                haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
                indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
                IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
            }
@ -96,7 +96,8 @@ public class ErrorModel  {
                final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
                //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
                final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
-                perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
+                if (!haplotypeMap.isEmpty())
+                    perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
            }
            int idx = 0;
            for (PileupElement refPileupElement : refSamplePileup) {
@ -108,7 +109,7 @@ public class ErrorModel  {
                    if (DEBUG) System.out.println(m);
                    isMatch |= m;
                }
-                if (refSampleVC.isIndel()) {
+                if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
                    // ignore match/mismatch if reads, as determined by their likelihood, are not informative
                    double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
                    if (!isInformativeElement(perAlleleLikelihoods))
@ -173,10 +174,10 @@ public class ErrorModel  {
        // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
        if (allele.isReference()) {
            // for a ref allele, any base mismatch or new indel is a mismatch.
-            if(allele.getBases().length>0 )
+            if(allele.getBases().length>0)
                // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
                // could clean up/simplify this when unpadding is removed
-                return (pileupElement.getBase() == refBase);
+                return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
            else
                // either null allele to compare, or ref/alt lengths are different (indel by definition).
                // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
 import java.io.PrintStream;
 import java.util.*;

-public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
+public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
    final protected UnifiedArgumentCollection UAC;

@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
    private final static boolean VERBOSE = false;

-    protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
+    protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
        ploidy = UAC.samplePloidy;
        this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        for ( final double[] likelihoods : GLs ) {

            final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
-            final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
+            final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
            // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
            for (int k=1; k < acCount.length;k++) {
                if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        return newPool;
    }

-    // todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
+    // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
    /**
     *
     * @param set                       ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
                continue;


-            PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
+            GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
        }


@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {

        // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
        // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
-        PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
+        GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
        final double[] x = originalPool.getLikelihoodsAsVector(true);
        while(firstIterator.hasNext()) {
            x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
            firstIterator.next();
        }

-        PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+        GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
        final double[] y = yy.clone();
        while(secondIterator.hasNext()) {
            y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {

        // initialize output to -log10(choose(m1+m2,[k1 k2...])
        final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
-        final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
+        final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);


        // Now, result(K) =  logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
            double denom =  -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);

            // for current conformation, get all possible ways to break vector K into two components G1 and G2
-            final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+            final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
            set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
            while (innerIterator.hasNext()) {
                // check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
            if ( numOriginalAltAlleles == numNewAltAlleles) {
                newLikelihoods = originalLikelihoods;
            } else {
-                newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
+                newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);

                // might need to re-normalize
                newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        // find the genotype with maximum likelihoods
        final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);

-        final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
+        final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
        final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
        final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();

--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;

 import java.util.*;

-public abstract class PoolGenotypeLikelihoods {
+public abstract class GeneralPloidyGenotypeLikelihoods {
    protected final int numChromosomes;
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6

@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {

    private static final boolean FAST_GL_COMPUTATION = true;
    // constructor with given logPL elements
-    public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
-                                   final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
+    public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                            final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
        this.alleles = alleles;
        this.nAlleles = alleles.size();
        numChromosomes = ploidy;
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
            Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
        } else {
            if (logLikelihoods.length != likelihoodDim)
-                throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
+                throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");

            log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
        }
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
                final int numAlleles = currentState.length;
                final int ploidy = restrictSumTo;

-                linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
+                linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
            }
            else
                throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
    public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
                                                   final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {

-        int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
+        int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
        double[] newPLs = new double[newPLSize];


@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
                    newCount[idx] =  pVec[permutationKey[idx]];
    
                // get corresponding index from new count
-                int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
+                int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
                newPLs[outputIdx] = pl;
                if (VERBOSE) {
                    System.out.println("Old Key:"+Arrays.toString(pVec));
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
+public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {

    //protected Set<String> laneIDs;
    public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

    final protected UnifiedArgumentCollection UAC;

-    protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+    protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC,logger);
        this.UAC = UAC;

@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
    protected static class PoolGenotypeData {

        public final String name;
-        public final PoolGenotypeLikelihoods GL;
+        public final GeneralPloidyGenotypeLikelihoods GL;
        public final int depth;
        public final List<Allele> alleles;

-        public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
+        public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
            this.name = name;
            this.GL = GL;
            this.depth = depth;
@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
            ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();

            // create the GenotypeLikelihoods object
-            final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
+            final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
            // actually compute likelihoods
            final int nGoodBases = GL.add(pileup, UAC);
            if ( nGoodBases > 0 )
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

        for ( PoolGenotypeData sampleData : GLs ) {
            // extract from multidimensional array
-            final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
+            final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
                    allAlleles, alleles);

            // normalize in log space so that max element is zero.
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
       Abstract methods - must be implemented in derived classes
    */

-    protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
@ -18,7 +18,7 @@ import java.util.*;
 * Time: 10:06 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
+public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
    final PairHMMIndelErrorModel pairModel;
    final LinkedHashMap<Allele, Haplotype> haplotypeMap;
    final ReferenceContext refContext;
@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {

    final byte refBase;

-    public PoolIndelGenotypeLikelihoods(final List<Allele> alleles, 
-                                        final double[] logLikelihoods, 
-                                        final int ploidy,
-                                        final HashMap<String, ErrorModel> perLaneErrorModels, 
-                                        final boolean ignoreLaneInformation,
-                                        final PairHMMIndelErrorModel pairModel, 
-                                        final LinkedHashMap<Allele, Haplotype> haplotypeMap,
-                                        final ReferenceContext referenceContext) {
+    public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
+                                                 final double[] logLikelihoods,
+                                                 final int ploidy,
+                                                 final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                 final boolean ignoreLaneInformation,
+                                                 final PairHMMIndelErrorModel pairModel,
+                                                 final LinkedHashMap<Allele, Haplotype> haplotypeMap,
+                                                 final ReferenceContext referenceContext) {
        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
        this.pairModel = pairModel;
        this.haplotypeMap = haplotypeMap;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
 import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
    private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;

    private PairHMMIndelErrorModel pairModel;
@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
    }
       */

-    protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
+    protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
        super(UAC, logger);


@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
    }


-    protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
                                                                               final boolean useBQAedPileup,
                                                                               final ReferenceContext ref,
                                                                               final boolean ignoreLaneInformation){
-        return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
+        return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
    }

    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
@ -96,6 +94,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
            haplotypeMap.clear();
        }
        IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
+
+        // sanity check: if haplotype map couldn't be created, clear allele list
+        if (haplotypeMap.isEmpty())
+            alleles.clear();
        return alleles;

    }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
 * and posteriors given a pile of bases and quality scores
 *
 */
-public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
+public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {

    final List<Allele> myAlleles;
    final int[] alleleIndices;
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
     * @param useBQAedPileup    Use BAQed pileup
     * @param ignoreLaneInformation  If true, lane info is ignored
     */
-    public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
-                                      final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
+    public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                               final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
        this.useBAQedPileup = useBQAedPileup;

        myAlleles = new ArrayList<Allele>(alleles);

-        refByte = alleles.get(0).getBases()[0];  // by construction, first allele in list is always ref!
+        Allele refAllele = alleles.get(0);
+        //sanity check: by construction, first allele should ALWAYS be the reference alleles
+        if (!refAllele.isReference())
+            throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
+
+        refByte = refAllele.getBases()[0];  // by construction, first allele in list is always ref!

        if (myAlleles.size() < BaseUtils.BASES.length) {
            // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {


-    protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC,  Logger logger) {
+    protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);

    }

-    protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
                                                                               final boolean useBQAedPileup,
                                                                               final ReferenceContext ref,
                                                                               final boolean ignoreLaneInformation) {
-        return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
+        return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
    }

    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
        samplesList.addAll( samples );
        // initialize the UnifiedGenotyper Engine which is used to call into the exact model
+        UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
        UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
        UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
@ -19,7 +19,7 @@ import java.util.Arrays;
 * Time: 7:44 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolAFCalculationModelUnitTest extends BaseTest {
+public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {

    static double[] AA1, AB1, BB1;
    static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
    public void testGLs(GetGLsTest cfg) {

        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
-        final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
+        final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
        double[] priors = new double[len];  // flat priors

-        PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
+        GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
        int nameIndex = 1;
        for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
            int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
@ -41,7 +41,7 @@ import java.io.PrintStream;
 import java.util.*;


-public class PoolGenotypeLikelihoodsUnitTest {
+public class GeneralPloidyGenotypeLikelihoodsUnitTest {

    final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
    final Logger logger = Logger.getLogger(Walker.class);
@ -60,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
    public void testStoringLikelihoodElements() {


-        // basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
+        // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back

        int ploidy = 20;
        int numAlleles = 4;
@ -78,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        for (int k=0; k < gls.length; k++)
            gls[k]= (double)k;

-        PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
+        GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
        double[] glnew = gl.getLikelihoods();

        Assert.assertEquals(gls, glnew);
@ -90,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

        for (int ploidy = 2; ploidy < 10; ploidy++) {
            for (int nAlleles = 2; nAlleles < 10; nAlleles++)
-                Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
+                Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
                        GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
        }

@ -102,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        // create iterator, compare linear index given by iterator with closed form function
        int numAlleles = 4;
        int ploidy = 2;
-        PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);

        while(iterator.hasNext()) {
            System.out.format("\n%d:",iterator.getLinearIndex());
@ -111,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
                System.out.format("%d ",aa);


-            int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
+            int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
            System.out.format("Computed idx = %d\n",computedIdx);
            iterator.next();
        }
@ -140,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        allelesToSubset.add(Allele.create("A",false));
        allelesToSubset.add(Allele.create("C",false));

-        double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
+        double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
                originalAlleles, allelesToSubset);


@ -170,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
    @Test
    public void testIndexIterator() {
        int[] seed = new int[]{1,2,3,4};
-        PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
        // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
        Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);

@ -228,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {

    }

-    private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
-        PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
+    private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);

        while(iterator.hasNext()) {
            int[] a =  iterator.getCurrentVector();
-            int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
+            int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
            if (VERBOSE)   {
                System.out.format("%d:",iterator.getLinearIndex());
                for (int i=0; i < seed.length; i++)
@ -289,13 +289,11 @@ public class PoolGenotypeLikelihoodsUnitTest {

    }

-    // TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than
-    // TODO --     HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated.
-    @Test (enabled = false)
+    @Test
    public void testIndelErrorModel() {
        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
        final byte refByte = refPileupTestProvider.getRefByte();
-        final String altBases = (char)refByte + "TCA";
+        final String altBases = "TCA";
        final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
        final List<Allele> trueAlleles = new ArrayList<Allele>();
        trueAlleles.add(Allele.create(refByte, true));
@ -316,7 +314,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
                // get artificial alignment context for ref sample - no noise
                // CASE 1: Test HET insertion
                // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
-                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
+                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
@ -393,6 +391,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        final byte refByte = readPileupTestProvider.getRefByte();
        final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';

+
        final List<Allele> allAlleles = new ArrayList<Allele>();  // this contains only ref Allele up to now
        final Set<String> laneIDs = new TreeSet<String>();
        laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE);
@ -409,23 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
        for (String laneID : laneIDs)
            noisyErrorModels.put(laneID, Q30ErrorModel);

-        final int refIdx = 0;
-        int altIdx = 2;
-
-        // ref allele must be first
-        allAlleles.add(Allele.create(refByte, true));
+         // all first ref allele
+        allAlleles.add(Allele.create(refByte,true));
        for (byte b: BaseUtils.BASES) {
-            if (refByte != b) {
-                if (b == altByte)
-                    altIdx = allAlleles.size();
+            if (refByte != b)
                allAlleles.add(Allele.create(b, false));
-            }
        }

+        final int refIdx = 0;
+        int altIdx = -1;
+
+        for (int k=0; k < allAlleles.size(); k++)
+            if (altByte == allAlleles.get(k).getBases()[0]) {
+                altIdx = k;
+                break;
+            }
+
+
+
        PrintStream out = null;
        if (SIMULATE_NOISY_PILEUP) {
            try {
-                out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
+                out = new PrintStream(new File("GLUnitTest.table"));
    //                            out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
            }
            catch (Exception e) {}
@ -449,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

                    // get now likelihoods for this

-                    final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
+                    final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
                    final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
                    if (VERBOSE) {
                        System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -478,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

                            // get now likelihoods for this

-                            final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
+                            final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
                            noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
                            mlPair = noisyGL.getMostLikelyACCount();

--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@ -12,33 +12,33 @@ import org.testng.annotations.Test;
 * Time: 11:28 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolCallerIntegrationTest extends WalkerTest {
+public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
    final static String REF = b37KGReference;
    final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
    final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
    final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
    final String REFSAMPLE_NAME = "NA12878";
-    final String MTINTERVALS = "MT";
+    final String MTINTERVALS = "MT:1-3000";
    final String LSVINTERVALS = "20:40,000,000-41,000,000";
    final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
    final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
    final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
    private void PC_MT_Test(String bam, String args, String name, String md5) {
-        final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
                REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
        executeTest("testPoolCaller:"+name+" args=" + args, spec);
    }

    private void PC_LSV_Test(String args, String name, String model, String md5) {
-        final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
                REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
        executeTest("testPoolCaller:"+name+" args=" + args, spec);
    }

    private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
-        final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
                REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
        executeTest("testPoolCaller:"+name+" args=" + args, spec);
@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest {

    @Test
    public void testBOTH_GGA_Pools() {
-        PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
+        PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
    }

    @Test
    public void testINDEL_GGA_Pools() {
-        PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
+        PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
    }

    @Test
    public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
-        PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
+        PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
    }

    @Test
    public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
-        PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
+        PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
    }

    @Test
    public void testMT_SNP_DISCOVERY_sp4() {
-         PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
+         PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7");
    }

    @Test
    public void testMT_SNP_GGA_sp10() {

-        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
+        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a");
    }

 }
--- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@ -1,4 +1,5 @@
 library("ggplot2")
+library("tools") #For compactPDF in R 2.13+

 args <- commandArgs(TRUE)
 data <- read.csv(args[1])
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -130,12 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {

        // can't close tribble index when writing
        if ( message.indexOf("Unable to close index for") != -1 )
-            exitSystemWithUserError(new UserException(t.getCause().getMessage()));
+            exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));

        // disk is full
        if ( message.indexOf("No space left on device") != -1 )
            exitSystemWithUserError(new UserException(t.getMessage()));
-        if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
+        if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
            exitSystemWithUserError(new UserException(t.getCause().getMessage()));
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -274,6 +274,38 @@ public class GenomeAnalysisEngine {
        //return result;
    }

+    // TODO -- Let's move this to a utility class in unstable - but which one?
+    // **************************************************************************************
+    // *                            Handle Deprecated Walkers                               *
+    // **************************************************************************************
+
+    // Mapping from walker name to major version number where the walker first disappeared
+    private static Map<String, String> deprecatedGATKWalkers = new HashMap<String, String>();
+    static {
+        deprecatedGATKWalkers.put("CountCovariates", "2.0");
+        deprecatedGATKWalkers.put("TableRecalibration", "2.0");
+    }
+
+    /**
+     * Utility method to check whether a given walker has been deprecated in a previous GATK release
+     *
+     * @param walkerName   the walker class name (not the full package) to check
+     */
+    public static boolean isDeprecatedWalker(final String walkerName) {
+        return deprecatedGATKWalkers.containsKey(walkerName);
+    }
+
+    /**
+     * Utility method to check whether a given walker has been deprecated in a previous GATK release
+     *
+     * @param walkerName   the walker class name (not the full package) to check
+     */
+    public static String getDeprecatedMajorVersionNumber(final String walkerName) {
+        return deprecatedGATKWalkers.get(walkerName);
+    }
+
+    // **************************************************************************************
+
    /**
     * Retrieves an instance of the walker based on the walker name.
     *
@ -287,6 +319,9 @@ public class GenomeAnalysisEngine {
            if ( isGATKLite() && GATKLiteUtils.isAvailableOnlyInFullGATK(walkerName) ) {
                e = new UserException.NotSupportedInGATKLite("the " + walkerName + " walker is available only in the full version of the GATK");
            }
+            else if ( isDeprecatedWalker(walkerName) ) {
+                e = new UserException.DeprecatedWalker(walkerName, getDeprecatedMajorVersionNumber(walkerName));
+            }
            throw e;
        }
    }
@ -762,6 +797,14 @@ public class GenomeAnalysisEngine {
        if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
            throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");

+        if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
+            throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
+
+        boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
+
+        if (argCollection.keepProgramRecords)
+            removeProgramRecords = false;
+
        return new SAMDataSource(
                samReaderIDs,
                threadAllocation,
@ -778,7 +821,8 @@ public class GenomeAnalysisEngine {
                getWalkerBAQQualityMode(),
                refReader,
                getBaseRecalibration(),
-                argCollection.defaultBaseQualities);
+                argCollection.defaultBaseQualities,
+                removeProgramRecords);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -249,6 +249,12 @@ public class GATKArgumentCollection {
    @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
    public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;

+    @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
+    public boolean removeProgramRecords = false;
+
+    @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
+    public boolean keepProgramRecords = false;
+
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -89,6 +89,11 @@ public class SAMDataSource {
     */
    private final SAMFileReader.ValidationStringency validationStringency;

+    /**
+     * Do we want to remove the program records from this data source?
+     */
+    private final boolean removeProgramRecords;
+
    /**
     * Store BAM indices for each reader present.
     */
@ -200,7 +205,8 @@ public class SAMDataSource {
                BAQ.QualityMode.DONT_MODIFY,
                null, // no BAQ
                null, // no BQSR
-                (byte) -1);
+                (byte) -1,
+                false);
    }

    /**
@ -233,7 +239,8 @@ public class SAMDataSource {
            BAQ.QualityMode qmode,
            IndexedFastaSequenceFile refReader,
            BaseRecalibration bqsrApplier,
-            byte defaultBaseQualities) {
+            byte defaultBaseQualities,
+            boolean removeProgramRecords) {
        this.readMetrics = new ReadMetrics();
        this.genomeLocParser = genomeLocParser;

@ -249,6 +256,7 @@ public class SAMDataSource {
            dispatcher = null;

        validationStringency = strictness;
+        this.removeProgramRecords = removeProgramRecords;
        if(readBufferSize != null)
            ReadShard.setReadBufferSize(readBufferSize);
        else {
@ -748,7 +756,7 @@ public class SAMDataSource {
        private synchronized void createNewResource() {
            if(allResources.size() > maxEntries)
                throw new ReviewedStingException("Cannot create a new resource pool.  All resources are in use.");
-            SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
+            SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
            allResources.add(readers);
            availableResources.add(readers);
        }
@ -777,9 +785,11 @@ public class SAMDataSource {
        /**
         * Derive a new set of readers from the Reads metadata.
         * @param readerIDs reads to load.
+         * TODO: validationStringency is not used here
         * @param validationStringency validation stringency.
+         * @param removeProgramRecords indicate whether to clear program records from the readers
         */
-        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
            final int totalNumberOfFiles = readerIDs.size();
            int readerNumber = 1;
            final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
            long lastTick = timer.currentTime();
            for(final SAMReaderID readerID: readerIDs) {
                final ReaderInitializer init = new ReaderInitializer(readerID).call();
+                if (removeProgramRecords) {
+                    init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
+                }
                if (threadAllocation.getNumIOThreads() > 0) {
                    inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
        if (stub.getGenerateMD5())
            factory.setCreateMd5File(true);
        // Adjust max records in RAM.
+        // TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
        if(stub.getMaxRecordsInRam() != null)
            factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -86,7 +86,7 @@ public class GATKRunReport {
    private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");

    // number of milliseconds before the S3 put operation is timed-out:
-    private static final long S3PutTimeOut = 30 * 1000;
+    private static final long S3PutTimeOut = 10 * 1000;


    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@ -89,9 +89,9 @@ public class GATKReport {
            reader = new BufferedReader(new FileReader(file));
            reportHeader = reader.readLine();
        } catch (FileNotFoundException e) {
-            throw new ReviewedStingException("Could not open file : " + file);
+            throw new UserException.CouldNotReadInputFile(file, "it does not exist");
        } catch (IOException e) { 
-            throw new ReviewedStingException("Could not read file : " + file);                            
+            throw new UserException.CouldNotReadInputFile(file, e);
        }   


--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -32,6 +32,7 @@ import java.util.List;
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
+@RemoveProgramRecords
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {

    @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+@RemoveProgramRecords
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
    public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 8/2/12
+ * Time: 1:58 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that program records should be removed from SAM headers by default for this walker
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface RemoveProgramRecords {
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.commandline.Gatherer;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;

 import java.io.File;
 import java.io.FileNotFoundException;
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer  {
        if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
            final File recal_out = new File(output.getName() + ".original");
            final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
-            RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
        }
        else if (!RAC.NO_PLOTS) {
            final File recal_out = new File(output.getName() + ".recal");
-            RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
        }

        generalReport.output(outputFile);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -58,7 +62,7 @@ import java.util.ArrayList;
 * This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
 * only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
 * of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
- * reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
+ * reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
 * probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
 * The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
 * <p>
@ -90,7 +94,7 @@ import java.util.ArrayList;
 * <h2>Examples</h2>
 * <pre>
 * java -Xmx4g -jar GenomeAnalysisTK.jar \
- *   -T BaseQualityScoreRecalibrator \
+ *   -T BaseRecalibrator \
 *   -I my_reads.bam \
 *   -R resources/Homo_sapiens_assembly18.fasta \
 *   -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
    @ArgumentCollection
    private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();                          // all the command line arguments for BQSR and it's covariates

-    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization 
+    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization
    
    private RecalibrationTables recalibrationTables;

@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
            throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);

        if (RAC.LIST_ONLY) {
-            RecalDataManager.listAvailableCovariates(logger);
+            RecalUtils.listAvailableCovariates(logger);
            System.exit(0);
        }
        RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE;                                          // if we have a recalibration file, record it so it goes on the report table

-        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC);       // initialize the required and optional covariates
+        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC);       // initialize the required and optional covariates
        ArrayList<Covariate> requiredCovariates = covariates.getFirst();
        ArrayList<Covariate> optionalCovariates = covariates.getSecond();

@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed

                if (readNotSeen(read)) {
                    read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
-                    RecalDataManager.parsePlatformForRead(read, RAC);
-                    if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
+                    RecalUtils.parsePlatformForRead(read, RAC);
+                    if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
                        read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                        continue;
                    }
-                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
+                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
                }

                if (!ReadUtils.isSOLiDRead(read) ||                                                                     // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
-                    RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
-                        RecalDataManager.isColorSpaceConsistent(read, offset))
+                    RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
+                        RecalUtils.isColorSpaceConsistent(read, offset))
                    recalibrationEngine.updateDataForPileupElement(p, ref.getBase());                                                             // This base finally passed all the checks for a good base, so add it to the big data hashmap
            }
            countedSites++;
@ -271,13 +275,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
    public void onTraversalDone(Long result) {
        logger.info("Calculating quantized quality scores...");
        quantizeQualityScores();
+
+        logger.info("Writing recalibration report...");
+        generateReport();
+        logger.info("...done!");
+
        if (!RAC.NO_PLOTS) {
            logger.info("Generating recalibration plots...");
            generatePlots();
        }
-        logger.info("Writing recalibration report...");
-        generateReport();
-        logger.info("...done!");
+
        logger.info("Processed: " + result + " sites");
    }

@ -285,10 +292,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
        File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
        if (recalFile != null) {
            RecalibrationReport report = new RecalibrationReport(recalFile);
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
        }
        else
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
    }


@ -309,7 +316,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
            throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
        }

-        RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
+        RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
    }
 }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
@ -1,109 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Jan 6, 2010
- *
- * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
- * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class Datum {
-
-    long numObservations;                                                                                     // number of bases seen in total
-    long numMismatches;                                                                                       // number of bases seen that didn't match the reference
-
-    private static final int SMOOTHING_CONSTANT = 1;                                                                    // used when calculating empirical qualities to avoid division by zero
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public Datum() {
-        numObservations = 0L;
-        numMismatches = 0L;
-    }
-
-    public Datum(long numObservations, long numMismatches) {
-        this.numObservations = numObservations;
-        this.numMismatches = numMismatches;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // increment methods
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    synchronized void increment(final long incObservations, final long incMismatches) {
-        numObservations += incObservations;
-        numMismatches += incMismatches;
-    }
-
-    synchronized void increment(final boolean isError) {
-        numObservations++;
-        numMismatches += isError ? 1:0;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // methods to derive empirical quality score
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    double empiricalQualDouble() {
-        final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
-        final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
-        final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
-        return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
-    }
-
-    byte empiricalQualByte() {
-        final double doubleMismatches = (double) (numMismatches);
-        final double doubleObservations = (double) (numObservations);
-        return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations);                                    // This is capped at Q40
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof Datum))
-            return false;
-        Datum other = (Datum) o;
-        return numMismatches == other.numMismatches && numObservations == other.numObservations;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
@ -1,148 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
-
-import java.util.Random;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class RecalDatum extends Datum {
-
-    private static final double UNINITIALIZED = -1.0;
-
-    private double estimatedQReported;                                                                                  // estimated reported quality score based on combined data's individual q-reporteds and number of observations
-    private double empiricalQuality;                                                                                    // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
-
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
-        numObservations = _numObservations;
-        numMismatches = _numMismatches;
-        estimatedQReported = reportedQuality;
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    public RecalDatum(final RecalDatum copy) {
-        this.numObservations = copy.numObservations;
-        this.numMismatches = copy.numMismatches;
-        this.estimatedQReported = copy.estimatedQReported;
-        this.empiricalQuality = copy.empiricalQuality;
-    }
-
-    public void combine(final RecalDatum other) {
-        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
-        increment(other.numObservations, other.numMismatches);
-        estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Override
-    public void increment(final boolean isError) {
-        super.increment(isError);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Requires("empiricalQuality == UNINITIALIZED")
-    @Ensures("empiricalQuality != UNINITIALIZED")
-    protected final void calcEmpiricalQuality() {
-        empiricalQuality = empiricalQualDouble();                                                                       // cache the value so we don't call log over and over again
-    }
-
-    public void setEstimatedQReported(final double estimatedQReported) {
-        this.estimatedQReported = estimatedQReported;
-    }
-
-    public final double getEstimatedQReported() {
-        return estimatedQReported;
-    }
-
-    public void setEmpiricalQuality(final double empiricalQuality) {
-        this.empiricalQuality = empiricalQuality;
-    }
-
-    public final double getEmpiricalQuality() {
-        if (empiricalQuality == UNINITIALIZED)
-            calcEmpiricalQuality();
-        return empiricalQuality;
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
-    }
-
-    public String stringForCSV() {
-        return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
-    }
-
-    private double calcExpectedErrors() {
-        return (double) this.numObservations * qualToErrorProb(estimatedQReported);
-    }
-
-    private double qualToErrorProb(final double qual) {
-        return Math.pow(10.0, qual / -10.0);
-    }
-
-    public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
-        final Random random = new Random();
-        final int nObservations = random.nextInt(maxObservations);
-        final int nErrors = random.nextInt(maxErrors);
-        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
-        return new RecalDatum(nObservations, nErrors, (byte)qual);
-    }
-
-    /**
-     * We don't compare the estimated quality reported because it may be different when read from
-     * report tables.
-     *
-     * @param o the other recal datum
-     * @return true if the two recal datums have the same number of observations, errors and empirical quality.
-     */
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof RecalDatum))
-            return false;
-        RecalDatum other = (RecalDatum) o;
-        return super.equals(o) &&
-               MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;

 import java.io.File;
 import java.util.Collections;
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
     * reads which have had the reference inserted because of color space inconsistencies.
     */
    @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
-    public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
+    public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;

    /**
     * CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
     * their color space tag can not be recalibrated.
     */
    @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
-    public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+    public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;

    /**
     * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection {
    public GATKReportTable generateReportTable() {
        GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
        argumentsTable.addColumn("Argument");
-        argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+        argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
        argumentsTable.addRowID("covariate", true);
-        argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
+        argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
        argumentsTable.addRowID("no_standard_covs", true);
-        argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
+        argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
        argumentsTable.addRowID("run_without_dbsnp", true);
-        argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
+        argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
        argumentsTable.addRowID("solid_recal_mode", true);
-        argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
+        argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
        argumentsTable.addRowID("solid_nocall_strategy", true);
-        argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
+        argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
        argumentsTable.addRowID("mismatches_context_size", true);
-        argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
+        argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
        argumentsTable.addRowID("indels_context_size", true);
-        argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
+        argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
        argumentsTable.addRowID("mismatches_default_quality", true);
-        argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
+        argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
        argumentsTable.addRowID("insertions_default_quality", true);
-        argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
+        argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
        argumentsTable.addRowID("low_quality_tail", true);
-        argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
+        argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
        argumentsTable.addRowID("default_platform", true);
-        argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
+        argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
        argumentsTable.addRowID("force_platform", true);
-        argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
+        argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
        argumentsTable.addRowID("quantizing_levels", true);
-        argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
+        argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
        argumentsTable.addRowID("keep_intermediate_files", true);
-        argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
+        argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
        argumentsTable.addRowID("no_plots", true);
-        argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
+        argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
        argumentsTable.addRowID("recalibration_report", true);
-        argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
+        argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
        argumentsTable.addRowID("binary_tag_name", true);
-        argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
+        argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
        return argumentsTable;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
@ -47,7 +47,10 @@ import java.util.List;
 * <p>
 * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
 * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
- * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Several important notes:
+ * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
+ * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
+ * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
 * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
@ -102,7 +105,7 @@ public class FastaAlternateReference extends FastaReference {
        String refBase = String.valueOf((char)ref.getBase());

        // Check to see if we have a called snp
-        for ( VariantContext vc : tracker.getValues(variants) ) {
+        for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
            if ( vc.isFiltered() )
                continue;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {

    public enum Model {
        /** The default model with the best performance in all cases */
-        EXACT,
-        POOL
+        EXACT
    }

    protected int N;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
    public enum Model {
        SNP,
        INDEL,
-        BOTH,
-        POOLSNP,
-        POOLINDEL,
-        POOLBOTH
+        GeneralPloidySNP,
+        GeneralPloidyINDEL,
+        BOTH
    }

    public enum GENOTYPING_MODE {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -82,7 +82,7 @@ import java.util.*;
 *   -o snps.raw.vcf \
 *   -stand_call_conf [50.0] \
 *   -stand_emit_conf 10.0 \
- *   -dcov [50] \
+ *   -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
 *   [-L targets.interval_list]
 * </pre>
 *
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
        } else {
            // in full mode: check for consistency in ploidy/pool calling arguments
            // check for correct calculation models
-            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
+/*            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
                // polyploidy requires POOL GL and AF calculation models to be specified right now
                if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
                        && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH)   {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
                    throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");

            }
+  */
            // get all of the unique sample names
            if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
                samples.clear();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -50,6 +50,7 @@ import java.util.*;

 public class UnifiedGenotyperEngine {
    public static final String LOW_QUAL_FILTER_NAME = "LowQual";
+    private static final String GPSTRING = "GeneralPloidy";

    public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";

@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine {
            glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
        }

-        return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
+        return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
    }

    private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine {
        if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
            modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");

+        if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+            modelPrefix = GPSTRING + modelPrefix;
+
        // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
            final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine {

            if ( vcInput.isSNP() )  {
                // ignore SNPs if the user chose INDEL mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
                    models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
-                else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
-                    models.add(UAC.GLmodel);
-            }
+             }
            else if ( vcInput.isIndel() || vcInput.isMixed() ) {
                // ignore INDELs if the user chose SNP mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
+                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
                    models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
-                else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
-                    models.add(UAC.GLmodel);
            }
            // No support for other types yet
        }
@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine {
                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
            }
            else {
-                models.add(UAC.GLmodel);
+                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
            }
        }

@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine {
    }

    private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
+
        List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();

+        // user-specified name
+        String afModelName = UAC.AFmodel.name();
+
+        if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
+            afModelName = GPSTRING + afModelName;
+
        for (int i = 0; i < afClasses.size(); i++) {
            Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
            String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
-            if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
+            if (afModelName.equalsIgnoreCase(key)) {
                try {
                    Object args[] = new Object[]{UAC,N,logger,verboseWriter};
                    Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java
@ -73,19 +73,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
    @Output(required=false, doc="Output bam")
    protected StingSAMFileWriter writer = null;

-    /**
-     * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
-     * may run out of memory.  We recommend that you additionally tell Java to use a temp directory with plenty of available
-     * space (by setting java.io.tempdir on the command-line).
-     */
-    @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
-    protected int MAX_RECORDS_IN_RAM = 500000;
-
-    public void initialize() {
-        // set up the output writer
-        if ( writer != null )
-            writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
-    }
+    public void initialize() {}

    private void emit(final SAMRecord read) {
        if ( writer != null )
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
    private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
 //        for ( String sample : samplesToPhase )
 //            logger.debug(String.format("  Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
-        VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
+        VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
 //        logger.debug("original VC = " + vc);
 //        logger.debug("sub      VC = " + subvc);
        return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
            return true;
        // want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
        // first subset to the samples
-        VariantContext subContext = vc.subContextFromSamples(samples, true);
+        VariantContext subContext = vc.subContextFromSamples(samples);

        // now check to see (using EXACT model) whether this should be variant
        // do we want to apply a prior? maybe user-spec?
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
        if ( samples == null || samples.isEmpty() )
            return true;

-        VariantContext subContext =  vc.subContextFromSamples(samples, false);
+        VariantContext subContext =  vc.subContextFromSamples(samples);
        if ( subContext.isPolymorphicInSamples() ) {
            return true;
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu

    @Requires({"eval != null", "comp != null"})
    private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
-        // find all of the matching comps
+        if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
+            // if either of these are NO_VARIATION they are LENIENT matches
+            return EvalCompMatchType.LENIENT;
+
        if ( comp.getType() != eval.getType() )
            return EvalCompMatchType.NO_MATCH;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
        }
    }

-    public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (vc1 != null) updateTiTv(vc1, false);
-        if (vc2 != null) updateTiTv(vc2, true);
+    @Override
+    public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if (eval != null)
+            updateTiTv(eval, false);
+        if (comp != null)
+            updateTiTv(comp, true);
    }

    @Override
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java
@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
            final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
            for ( final VariantContext c : knownComps ) {
                // loop over sites, looking for something that matches the type eval
-                if ( eval.getType() == c.getType() ) {
+                if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
                    return KNOWN_STATES;
                }
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@ -197,7 +197,9 @@ public class VariantEvalUtils {
     * @return a new VariantContext with just the requested samples
     */
    public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
-        return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
+        // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
+        final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
+        return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
    }

    public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@ -262,12 +264,8 @@ public class VariantEvalUtils {
                // First, filter the VariantContext to represent only the samples for evaluation
                VariantContext vcsub = vc;

-                if (subsetBySample && vc.hasGenotypes()) {
-                    if ( variantEvalWalker.isSubsettingToSpecificSamples() )
-                        vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
-                    else
-                        vcsub = ensureAnnotations(vc, vc);
-                }
+                if (subsetBySample && vc.hasGenotypes())
+                    vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());

                if ((byFilter || !vcsub.isFiltered())) {
                    addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@ -58,14 +58,11 @@ import java.util.*;
 * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
 * slightly lower quality level.
 *
- * <p>
- * See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
- *
 * <h2>Input</h2>
 * <p>
 * The input raw variants to be recalibrated.
 * <p>
- * The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
+ * The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
 * <p>
 * The tranches file that was generated by the VariantRecalibrator walker.
 *
@ -82,6 +79,7 @@ import java.util.*;
 *   --ts_filter_level 99.0 \
 *   -tranchesFile path/to/output.tranches \
 *   -recalFile path/to/output.recal \
+ *   -mode SNP \
 *   -o path/to/output.recalibrated.filtered.vcf
 * </pre>
 *
@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T

        for( final VariantContext vc : VCs ) {

-            if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
+            if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {

                final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
                if( recalDatum == null ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -273,11 +274,37 @@ public class VariantDataManager {
    }

    private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
-        return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
-                        ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
+        return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
                        (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
    }

+    protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
+        switch( trainVC.getType() ) {
+            case SNP:
+            case MNP:
+                return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
+            case INDEL:
+            case MIXED:
+            case SYMBOLIC:
+                return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
+            default:
+                return false;
+        }
+    }
+
+    protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
+        switch( mode ) {
+            case SNP:
+                return evalVC.isSNP() || evalVC.isMNP();
+            case INDEL:
+                return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
+            case BOTH:
+                return true;
+            default:
+                throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
+        }
+    }
+
    public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
        // we need to sort in coordinate order in order to produce a valid VCF
        Collections.sort( data, new Comparator<VariantDatum>() {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
 import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;

 import java.io.File;
 import java.io.FileNotFoundException;
@ -65,7 +63,7 @@ import java.util.*;
 * The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
 * One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
 * The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
- * between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
+ * between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
 * variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
 * as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
 * error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
@ -73,15 +71,9 @@ import java.util.*;
 * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
 *
 * <p>
- * NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
- *
- * <p>
 * NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
 * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
 *
- * <p>
- * See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
- *
 * <h2>Input</h2>
 * <p>
 * The input raw variants to be recalibrated.
@ -90,7 +82,7 @@ import java.util.*;
 *
 * <h2>Output</h2>
 * <p>
- * A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
+ * A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
 * <p>
 * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
 *
@ -102,8 +94,9 @@ import java.util.*;
 *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
 *   -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
 *   -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
- *   -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
- *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
+ *   -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
+ *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
+ *   -mode SNP \
 *   -recalFile path/to/output.recal \
 *   -tranchesFile path/to/output.tranches \
 *   -rscriptFile path/to/output.plots.R
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
    @Advanced
    @Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
    protected Boolean TRUST_ALL_POLYMORPHIC = false;
-    //@Hidden
-    //@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
-    //protected Boolean PERFORM_PROJECT_CONSENSUS = false;

    /////////////////////////////
    // Private Member Variables
@ -255,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat

        for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
            if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
-                if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
+                if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
                    final VariantDatum datum = new VariantDatum();

                    // Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
                    // Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
                    dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
                    double priorFactor = QualityUtils.qualToProb( datum.prior );
-                    //if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
-                    //    final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
-                    //    priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
-                    //}
                    datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );

                    mapList.add( datum );
@ -282,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
        return mapList;
    }

-    public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
-        return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
-                (mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
-	            (mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
-    }
-
    //---------------------------------------------------------------------------------------------------------------
    //
    // reduce
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@ -69,9 +69,14 @@ public class QualityUtils {
     * @return a probability (0.0 - 1.0)
     */
    static private double qualToErrorProbRaw(int qual) {
+        return qualToErrorProb((double) qual);
+    }
+
+    public static double qualToErrorProb(final double qual) {
        return Math.pow(10.0, ((double) qual)/-10.0);
    }

+
    static public double qualToErrorProb(byte qual) {
        return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
    }
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@ -168,6 +168,28 @@ public class PluginManager<PluginType> {
            String pluginName = getName(pluginClass);
            pluginsByName.put(pluginName, pluginClass);
        }
+
+        // sort the plugins so the order of elements is deterministic
+        sortPlugins(plugins);
+        sortPlugins(interfaces);
+    }
+
+    /**
+     * Sorts, in place, the list of plugins according to getName() on each element
+     *
+     * @param unsortedPlugins
+     */
+    private final void sortPlugins(final List<Class<? extends PluginType>> unsortedPlugins) {
+        Collections.sort(unsortedPlugins, new ComparePluginsByName());
+    }
+
+    private final class ComparePluginsByName implements Comparator<Class<? extends PluginType>> {
+        @Override
+        public int compare(final Class<? extends PluginType> aClass, final Class<? extends PluginType> aClass1) {
+            String pluginName1 = getName(aClass);
+            String pluginName2 = getName(aClass1);
+            return pluginName1.compareTo(pluginName2);
+        }
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java
@ -4,7 +4,7 @@ import com.google.java.contract.Requires;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
 import com.google.java.contract.Requires;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
    final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
    private final static boolean FORBID_SYMBOLICS = false;

+    private final static int ALLOWED_MAJOR_VERSION = 2;
+    private final static int MIN_MINOR_VERSION = 1;
+
+    private BCFVersion bcfVersion = null;
+
    private VCFHeader header = null;

    /**
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
    public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
        try {
            // note that this reads the magic as well, and so does double duty
-            if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
-                error("Input stream does not begin with BCF2 magic");
+            bcfVersion = BCFVersion.readBCFVersion(inputStream);
+            if ( bcfVersion == null )
+                error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
+
+            if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
+                error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
+            if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
+                error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
+
+            logger.info("BCF version " + bcfVersion);

            final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);

@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
        FileInputStream fis = null;
        try {
            fis = new FileInputStream(path);
-            return BCF2Utils.startsWithBCF2Magic(fis);
+            final BCFVersion version = BCFVersion.readBCFVersion(fis);
+            return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
        } catch ( FileNotFoundException e ) {
            return false;
        } catch ( IOException e ) {
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
            try {
                if ( fis != null ) fis.close();
            } catch ( IOException e ) {
-                ; // do nothing
+                // do nothing
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
@ -202,7 +202,7 @@ public final class BCF2Decoder {
                return null;
            else {
                final String s = new String(bytes, 0, goodLength);
-                return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
+                return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
            }
        } catch ( IOException e ) {
            throw new ReviewedStingException("readByte failure", e);
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java
@ -41,8 +41,6 @@ import java.util.*;
 * @since 5/12
 */
 public final class BCF2Utils {
-    public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
-
    public static final int MAX_ALLELES_IN_GENOTYPES = 127;

    public static final int OVERFLOW_ELEMENT_MARKER = 15;
@ -75,68 +73,60 @@ public final class BCF2Utils {
     */
    @Requires("header != null")
    @Ensures({"result != null", "new HashSet(result).size() == result.size()"})
-    public final static ArrayList<String> makeDictionary(final VCFHeader header) {
+    public static ArrayList<String> makeDictionary(final VCFHeader header) {
        final Set<String> seen = new HashSet<String>();
        final ArrayList<String> dict = new ArrayList<String>();

-        boolean sawPASS = false;
+        // special case the special PASS field which doesn't show up in the FILTER field definitions
+        seen.add(VCFConstants.PASSES_FILTERS_v4);
+        dict.add(VCFConstants.PASSES_FILTERS_v4);
+
        // set up the strings dictionary
        for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
            if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
                final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
                if ( ! seen.contains(idLine.getID())) {
-                    sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
                    dict.add(idLine.getID());
                    seen.add(idLine.getID());
                }
            }
        }

-
-        if ( ! sawPASS )
-            dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
-
        return dict;
    }

    @Requires({"nElements >= 0", "type != null"})
-    public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
+    public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
        int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
        byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
        return typeByte;
    }

    @Ensures("result >= 0")
-    public final static int decodeSize(final byte typeDescriptor) {
+    public static int decodeSize(final byte typeDescriptor) {
        return (0xF0 & typeDescriptor) >> 4;
    }

    @Ensures("result >= 0")
-    public final static int decodeTypeID(final byte typeDescriptor) {
+    public static int decodeTypeID(final byte typeDescriptor) {
        return typeDescriptor & 0x0F;
    }

    @Ensures("result != null")
-    public final static BCF2Type decodeType(final byte typeDescriptor) {
+    public static BCF2Type decodeType(final byte typeDescriptor) {
        return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
    }

-    public final static boolean sizeIsOverflow(final byte typeDescriptor) {
+    public static boolean sizeIsOverflow(final byte typeDescriptor) {
        return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
    }

    @Requires("nElements >= 0")
-    public final static boolean willOverflow(final long nElements) {
+    public static boolean willOverflow(final long nElements) {
        return nElements > MAX_INLINE_ELEMENTS;
    }

-    public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
-        final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
-        stream.read(magicBytes);
-        return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
-    }
-
-    public final static byte readByte(final InputStream stream) {
+    public static byte readByte(final InputStream stream) {
        // TODO -- shouldn't be capturing error here
        try {
            return (byte)(stream.read() & 0xFF);
@ -155,7 +145,7 @@ public final class BCF2Utils {
     */
    @Requires({"strings != null", "strings.size() > 1"})
    @Ensures("result != null")
-    public static final String collapseStringList(final List<String> strings) {
+    public static String collapseStringList(final List<String> strings) {
        final StringBuilder b = new StringBuilder();
        for ( final String s : strings ) {
            if ( s != null ) {
@ -177,14 +167,14 @@ public final class BCF2Utils {
     */
    @Requires({"collapsed != null", "isCollapsedString(collapsed)"})
    @Ensures("result != null")
-    public static final List<String> exploreStringList(final String collapsed) {
+    public static List<String> explodeStringList(final String collapsed) {
        assert isCollapsedString(collapsed);
        final String[] exploded = collapsed.substring(1).split(",");
        return Arrays.asList(exploded);
    }

    @Requires("s != null")
-    public static final boolean isCollapsedString(final String s) {
+    public static boolean isCollapsedString(final String s) {
        return s.charAt(0) == ',';
    }

@ -226,7 +216,7 @@ public final class BCF2Utils {
    }

    @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final int value) {
+    public static BCF2Type determineIntegerType(final int value) {
        for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
            if ( potentialType.withinRange(value) )
                return potentialType;
@ -236,7 +226,7 @@ public final class BCF2Utils {
    }

    @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final int[] values) {
+    public static BCF2Type determineIntegerType(final int[] values) {
        // literally a copy of the code below, but there's no general way to unify lists and arrays in java
        BCF2Type maxType = BCF2Type.INT8;
        for ( final int value : values ) {
@ -262,7 +252,7 @@ public final class BCF2Utils {
     */
    @Requires({"t1.isIntegerType()","t2.isIntegerType()"})
    @Ensures("result.isIntegerType()")
-    public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
+    public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
        switch ( t1 ) {
            case INT8: return t2;
            case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
@ -272,7 +262,7 @@ public final class BCF2Utils {
    }

    @Ensures("result.isIntegerType()")
-    public final static BCF2Type determineIntegerType(final List<Integer> values) {
+    public static BCF2Type determineIntegerType(final List<Integer> values) {
        BCF2Type maxType = BCF2Type.INT8;
        for ( final int value : values ) {
            final BCF2Type type1 = determineIntegerType(value);
@ -297,7 +287,7 @@ public final class BCF2Utils {
     * @param o
     * @return
     */
-    public final static List<Object> toList(final Object o) {
+    public static List<Object> toList(final Object o) {
        if ( o == null ) return Collections.emptyList();
        else if ( o instanceof List ) return (List<Object>)o;
        else return Collections.singletonList(o);
@ -305,7 +295,7 @@ public final class BCF2Utils {


    @Requires({"stream != null", "bytesForEachInt > 0"})
-    public final static int readInt(int bytesForEachInt, final InputStream stream) {
+    public static int readInt(int bytesForEachInt, final InputStream stream) {
        switch ( bytesForEachInt ) {
            case 1: {
                return (byte)(readByte(stream));
@ -323,7 +313,7 @@ public final class BCF2Utils {
        }
    }

-    public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
+    public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
        switch ( type.getSizeInBytes() ) {
            case 1:
                encodeStream.write(0xFF & value);
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java
@ -0,0 +1,80 @@
+package org.broadinstitute.sting.utils.codecs.bcf2;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+/**
+ * Simple holder for BCF version information
+ *
+ * User: depristo
+ * Date: 8/2/12
+ * Time: 2:16 PM
+ */
+public class BCFVersion {
+    /**
+     * BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
+     * and m is the minor version, currently 1
+     */
+    public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
+
+    final int majorVersion;
+    final int minorVersion;
+
+    public BCFVersion(int majorVersion, int minorVersion) {
+        this.majorVersion = majorVersion;
+        this.minorVersion = minorVersion;
+    }
+
+    /**
+     * @return the major version number of this BCF file
+     */
+    public int getMajorVersion() {
+        return majorVersion;
+    }
+
+    /**
+     * @return the minor version number of this BCF file
+     */
+    public int getMinorVersion() {
+        return minorVersion;
+    }
+
+    /**
+     * Return a new BCFVersion object describing the major and minor version of the BCF file in stream
+     *
+     * Note that stream must be at the very start of the file.
+     *
+     * @param stream
+     * @return a BCFVersion object, or null if stream doesn't contain a BCF file
+     * @throws IOException
+     */
+    public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
+        final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
+        stream.read(magicBytes);
+        if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
+            // we're a BCF file
+            final int majorByte = stream.read();
+            final int minorByte = stream.read();
+            return new BCFVersion( majorByte, minorByte );
+        } else
+            return null;
+    }
+
+    /**
+     * Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
+     * @param out
+     * @throws IOException
+     */
+    public void write(final OutputStream out) throws IOException {
+        out.write(MAGIC_HEADER_START);
+        out.write(getMajorVersion() & 0xFF);
+        out.write(getMinorVersion() & 0xFF);
+    }
+
+    @Override
+    public String toString() {
+        return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -316,9 +316,9 @@ public class UserException extends ReviewedStingException {



-    public static class MissingWalker extends UserException {
-        public MissingWalker(String walkerName, String message) {
-            super(String.format("Walker %s is not available: %s", walkerName, message));
+    public static class DeprecatedWalker extends UserException {
+        public DeprecatedWalker(String walkerName, String version) {
+            super(String.format("Walker %s is no longer available in the GATK; it has been deprecated since version %s", walkerName, version));
        }
    }

--- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java
@ -4,7 +4,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.recalibration;

 import net.sf.samtools.SAMTag;
 import net.sf.samtools.SAMUtils;
-import org.broadinstitute.sting.gatk.walkers.bqsr.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
@ -103,7 +103,7 @@ public class BaseRecalibration {
            }
        }

-        RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates);                                  // compute all covariates for the read
+        RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);                                  // compute all covariates for the read
        for (final EventType errorModel : EventType.values()) {                                                         // recalibrate all three quality strings
            if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
                read.setBaseQualities(null, errorModel);
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java
@ -1,11 +1,9 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
-import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

 import java.util.Arrays;
 import java.util.List;
@ -41,7 +39,7 @@ public class QuantizationInfo {
        for (final RecalDatum value : qualTable.getAllValues()) {
            final RecalDatum datum = value;
            final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality());                                 // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
-            qualHistogram[empiricalQual] += datum.numObservations;                                                      // add the number of observations for every key
+            qualHistogram[empiricalQual] += datum.getNumObservations();                                                      // add the number of observations for every key
        }
        empiricalQualCounts = Arrays.asList(qualHistogram);                                                             // histogram with the number of observations of the empirical qualities
        quantizeQualityScores(quantizationLevels);
@ -70,15 +68,15 @@ public class QuantizationInfo {
    }

    public GATKReportTable generateReportTable() {
-        GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
-        quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
-        quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
-        quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
+        GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
+        quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME);
+        quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
+        quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);

        for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
-            quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual);
-            quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
-            quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
+            quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
+            quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
+            quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
        }
        return quantizedTable;
    }
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

 /**
 * The object temporarily held by a read that describes all of it's covariates.
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java
@ -0,0 +1,305 @@
+package org.broadinstitute.sting.utils.recalibration;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Invariant;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import java.util.Random;
+
+/**
+ * An individual piece of recalibration data. Each bin counts up the number of observations and the number
+ * of reference mismatches seen for that combination of covariates.
+ *
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: Nov 3, 2009
+ */
+@Invariant({
+        "estimatedQReported >= 0.0",
+        "! Double.isNaN(estimatedQReported)",
+        "! Double.isInfinite(estimatedQReported)",
+        "empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
+        "! Double.isNaN(empiricalQuality)",
+        "! Double.isInfinite(empiricalQuality)",
+        "numObservations >= 0",
+        "numMismatches >= 0",
+        "numMismatches <= numObservations"
+})
+public class RecalDatum {
+    private static final double UNINITIALIZED = -1.0;
+
+    /**
+     * estimated reported quality score based on combined data's individual q-reporteds and number of observations
+     */
+    private double estimatedQReported;
+
+    /**
+     * the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
+     */
+    private double empiricalQuality;
+
+    /**
+     * number of bases seen in total
+     */
+    private long numObservations;
+
+    /**
+     * number of bases seen that didn't match the reference
+     */
+    private long numMismatches;
+
+    /**
+     * used when calculating empirical qualities to avoid division by zero
+     */
+    private static final int SMOOTHING_CONSTANT = 1;
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // constructors
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Create a new RecalDatum with given observation and mismatch counts, and an reported quality
+     *
+     * @param _numObservations
+     * @param _numMismatches
+     * @param reportedQuality
+     */
+    public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
+        if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
+        if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
+        if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
+
+        numObservations = _numObservations;
+        numMismatches = _numMismatches;
+        estimatedQReported = reportedQuality;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    /**
+     * Copy copy into this recal datum, overwriting all of this objects data
+     * @param copy
+     */
+    public RecalDatum(final RecalDatum copy) {
+        this.numObservations = copy.getNumObservations();
+        this.numMismatches = copy.getNumMismatches();
+        this.estimatedQReported = copy.estimatedQReported;
+        this.empiricalQuality = copy.empiricalQuality;
+    }
+
+    /**
+     * Add in all of the data from other into this object, updating the reported quality from the expected
+     * error rate implied by the two reported qualities
+     *
+     * @param other
+     */
+    public synchronized void combine(final RecalDatum other) {
+        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
+        increment(other.getNumObservations(), other.getNumMismatches());
+        estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations());
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    public synchronized void setEstimatedQReported(final double estimatedQReported) {
+        if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
+        if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
+        if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
+
+        this.estimatedQReported = estimatedQReported;
+    }
+
+    public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
+        final Random random = new Random();
+        final int nObservations = random.nextInt(maxObservations);
+        final int nErrors = random.nextInt(maxErrors);
+        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
+        return new RecalDatum(nObservations, nErrors, (byte)qual);
+    }
+
+    public final double getEstimatedQReported() {
+        return estimatedQReported;
+    }
+    public final byte getEstimatedQReportedAsByte() {
+        return (byte)(int)(Math.round(getEstimatedQReported()));
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // Empirical quality score -- derived from the num mismatches and observations
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Returns the error rate (in real space) of this interval, or 0 if there are no obserations
+     * @return the empirical error rate ~= N errors / N obs
+     */
+    @Ensures("result >= 0.0")
+    public double getEmpiricalErrorRate() {
+        if ( numObservations == 0 )
+            return 0.0;
+        else {
+            // cache the value so we don't call log over and over again
+            final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
+            // smoothing is one error and one non-error observation, for example
+            final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT);
+            return doubleMismatches / doubleObservations;
+        }
+    }
+
+    public synchronized void setEmpiricalQuality(final double empiricalQuality) {
+        if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
+        if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
+        if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
+
+        this.empiricalQuality = empiricalQuality;
+    }
+
+    public final double getEmpiricalQuality() {
+        if (empiricalQuality == UNINITIALIZED)
+            calcEmpiricalQuality();
+        return empiricalQuality;
+    }
+
+    public final byte getEmpiricalQualityAsByte() {
+        return (byte)(Math.round(getEmpiricalQuality()));
+    }
+
+        //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public String toString() {
+        return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality()));
+    }
+
+    public String stringForCSV() {
+        return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
+    }
+
+//    /**
+//     * We don't compare the estimated quality reported because it may be different when read from
+//     * report tables.
+//     *
+//     * @param o the other recal datum
+//     * @return true if the two recal datums have the same number of observations, errors and empirical quality.
+//     */
+//    @Override
+//    public boolean equals(Object o) {
+//        if (!(o instanceof RecalDatum))
+//            return false;
+//        RecalDatum other = (RecalDatum) o;
+//        return super.equals(o) &&
+//               MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
+//    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // increment methods
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public long getNumObservations() {
+        return numObservations;
+    }
+
+    public synchronized void setNumObservations(final long numObservations) {
+        if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
+        this.numObservations = numObservations;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    public long getNumMismatches() {
+        return numMismatches;
+    }
+
+    @Requires({"numMismatches >= 0"})
+    public synchronized void setNumMismatches(final long numMismatches) {
+        if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
+        this.numMismatches = numMismatches;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"by >= 0"})
+    public synchronized void incrementNumObservations(final long by) {
+        numObservations += by;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"by >= 0"})
+    public synchronized void incrementNumMismatches(final long by) {
+        numMismatches += by;
+        empiricalQuality = UNINITIALIZED;
+    }
+
+    @Requires({"incObservations >= 0", "incMismatches >= 0"})
+    @Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
+    public synchronized void increment(final long incObservations, final long incMismatches) {
+        incrementNumObservations(incObservations);
+        incrementNumMismatches(incMismatches);
+    }
+
+    @Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
+    public synchronized void increment(final boolean isError) {
+        incrementNumObservations(1);
+        if ( isError )
+            incrementNumMismatches(1);
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // Private implementation helper functions
+    //
+    // -------------------------------------------------------------------------------------
+
+    /**
+     * Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
+     */
+    @Requires("empiricalQuality == UNINITIALIZED")
+    @Ensures("empiricalQuality != UNINITIALIZED")
+    private synchronized final void calcEmpiricalQuality() {
+        final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
+        empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
+    }
+
+    /**
+     * calculate the expected number of errors given the estimated Q reported and the number of observations
+     * in this datum.
+     *
+     * @return a positive (potentially fractional) estimate of the number of errors
+     */
+    @Ensures("result >= 0.0")
+    private double calcExpectedErrors() {
+        return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java
@ -0,0 +1,309 @@
+package org.broadinstitute.sting.utils.recalibration;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one
+ *
+ * @author Mark DePristo
+ * @since 07/27/12
+ */
+public class RecalDatumNode<T extends RecalDatum> {
+    private final static boolean USE_CHI2 = true;
+    protected static Logger logger = Logger.getLogger(RecalDatumNode.class);
+    private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY;
+    private final T recalDatum;
+    private double fixedPenalty = UNINITIALIZED;
+    private final Set<RecalDatumNode<T>> subnodes;
+
+    public RecalDatumNode(final T recalDatum) {
+        this(recalDatum, new HashSet<RecalDatumNode<T>>());
+    }
+
+    @Override
+    public String toString() {
+        return recalDatum.toString();
+    }
+
+    public RecalDatumNode(final T recalDatum, final Set<RecalDatumNode<T>> subnodes) {
+        this(recalDatum, UNINITIALIZED, subnodes);
+    }
+
+    protected RecalDatumNode(final T recalDatum, final double fixedPenalty) {
+        this(recalDatum, fixedPenalty, new HashSet<RecalDatumNode<T>>());
+    }
+
+    protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set<RecalDatumNode<T>> subnodes) {
+        this.recalDatum = recalDatum;
+        this.fixedPenalty = fixedPenalty;
+        this.subnodes = new HashSet<RecalDatumNode<T>>(subnodes);
+    }
+
+    public T getRecalDatum() {
+        return recalDatum;
+    }
+
+    public Set<RecalDatumNode<T>> getSubnodes() {
+        return subnodes;
+    }
+
+    public double getPenalty() {
+        if ( fixedPenalty != UNINITIALIZED )
+            return fixedPenalty;
+        else
+            return calcPenalty();
+    }
+
+    public double calcAndSetFixedPenalty(final boolean doEntireTree) {
+        fixedPenalty = calcPenalty();
+        if ( doEntireTree )
+            for ( final RecalDatumNode<T> sub : subnodes )
+                sub.calcAndSetFixedPenalty(doEntireTree);
+        return fixedPenalty;
+    }
+
+    public void addSubnode(final RecalDatumNode<T> sub) {
+        subnodes.add(sub);
+    }
+
+    public boolean isLeaf() {
+        return subnodes.isEmpty();
+    }
+
+    public int getNumBranches() {
+        return subnodes.size();
+    }
+
+    /**
+     * Total penalty is the sum of leaf node penalties
+     *
+     * This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by
+     * definition have 0 penalty unless they represent a pruned tree with underlying -- but now
+     * pruned -- subtrees
+     *
+     * @return
+     */
+    public double totalPenalty() {
+        if ( isLeaf() )
+            return getPenalty();
+        else {
+            double sum = 0.0;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                sum += sub.totalPenalty();
+            return sum;
+        }
+    }
+
+    public int maxDepth() {
+        int subMax = 0;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            subMax = Math.max(subMax, sub.maxDepth());
+        return subMax + 1;
+    }
+
+    public int minDepth() {
+        if ( isLeaf() )
+            return 1;
+        else {
+            int subMin = Integer.MAX_VALUE;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                subMin = Math.min(subMin, sub.minDepth());
+            return subMin + 1;
+        }
+    }
+
+    public int size() {
+        int size = 1;
+        for ( final RecalDatumNode<T> sub : subnodes )
+            size += sub.size();
+        return size;
+    }
+
+    public int numLeaves() {
+        if ( isLeaf() )
+            return 1;
+        else {
+            int size = 0;
+            for ( final RecalDatumNode<T> sub : subnodes )
+                size += sub.numLeaves();
+            return size;
+        }
+    }
+
+    private double calcPenalty() {
+        if ( USE_CHI2 )
+            return calcPenaltyChi2();
+        else
+            return calcPenaltyLog10(getRecalDatum().getEmpiricalErrorRate());
+    }
+
+    private double calcPenaltyChi2() {
+        if ( isLeaf() )
+            return 0.0;
+        else {
+            final long[][] counts = new long[subnodes.size()][2];
+
+            int i = 0;
+            for ( RecalDatumNode<T> subnode : subnodes ) {
+                counts[i][0] = subnode.getRecalDatum().getNumMismatches();
+                counts[i][1] = subnode.getRecalDatum().getNumObservations();
+                i++;
+            }
+
+            final double chi2 = new ChiSquareTestImpl().chiSquare(counts);
+
+//            StringBuilder x = new StringBuilder();
+//            StringBuilder y = new StringBuilder();
+//            for ( int k = 0; k < counts.length; k++) {
+//                if ( k != 0 ) {
+//                    x.append(", ");
+//                    y.append(", ");
+//                }
+//                x.append(counts[k][0]);
+//                y.append(counts[k][1]);
+//            }
+//            logger.info("x = c(" + x.toString() + ")");
+//            logger.info("y = c(" + y.toString() + ")");
+//            logger.info("chi2 = " + chi2);
+
+            return chi2;
+            //return Math.log10(chi2);
+        }
+    }
+
+    /**
+     * Calculate the penalty of this interval, given the overall error rate for the interval
+     *
+     * If the globalErrorRate is e, this value is:
+     *
+     * sum_i |log10(e_i) - log10(e)| * nObservations_i
+     *
+     * each the index i applies to all leaves of the tree accessible from this interval
+     * (found recursively from subnodes as necessary)
+     *
+     * @param globalErrorRate overall error rate in real space against which we calculate the penalty
+     * @return the cost of approximating the bins in this interval with the globalErrorRate
+     */
+    @Requires("globalErrorRate >= 0.0")
+    @Ensures("result >= 0.0")
+    private double calcPenaltyLog10(final double globalErrorRate) {
+        if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
+            return 0.0;
+
+        if ( isLeaf() ) {
+            // this is leave node
+            return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
+            // TODO -- how we can generalize this calculation?
+//            if ( this.qEnd <= minInterestingQual )
+//                // It's free to merge up quality scores below the smallest interesting one
+//                return 0;
+//            else {
+//                return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations();
+//            }
+        } else {
+            double sum = 0;
+            for ( final RecalDatumNode<T> hrd : subnodes)
+                sum += hrd.calcPenaltyLog10(globalErrorRate);
+            return sum;
+        }
+    }
+
+    public RecalDatumNode<T> pruneToDepth(final int maxDepth) {
+        if ( maxDepth < 1 )
+            throw new IllegalArgumentException("maxDepth < 1");
+        else {
+            final Set<RecalDatumNode<T>> subPruned = new HashSet<RecalDatumNode<T>>(getNumBranches());
+            if ( maxDepth > 1 )
+                for ( final RecalDatumNode<T> sub : subnodes )
+                    subPruned.add(sub.pruneToDepth(maxDepth - 1));
+            return new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, subPruned);
+        }
+    }
+
+    public RecalDatumNode<T> pruneByPenalty(final int maxElements) {
+        RecalDatumNode<T> root = this;
+
+        while ( root.size() > maxElements ) {
+            // remove the lowest penalty element, and continue
+            root = root.removeLowestPenaltyNode();
+        }
+
+        // our size is below the target, so we are good, return
+        return root;
+    }
+
+    /**
+     * Find the lowest penalty node in the tree, and return a tree without it
+     *
+     * Note this excludes the current (root) node
+     *
+     * @return
+     */
+    private RecalDatumNode<T> removeLowestPenaltyNode() {
+        final Pair<RecalDatumNode<T>, Double> nodeToRemove = getMinPenaltyNode();
+        logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond());
+
+        final Pair<RecalDatumNode<T>, Boolean> result = removeNode(nodeToRemove.getFirst());
+
+        if ( ! result.getSecond() )
+            throw new IllegalStateException("Never removed any node!");
+
+        final RecalDatumNode<T> oneRemoved = result.getFirst();
+        if ( oneRemoved == null )
+            throw new IllegalStateException("Removed our root node, wow, didn't expect that");
+        return oneRemoved;
+    }
+
+    private Pair<RecalDatumNode<T>, Double> getMinPenaltyNode() {
+        final double myValue = isLeaf() ? Double.MAX_VALUE : getPenalty();
+        Pair<RecalDatumNode<T>, Double> maxNode = new Pair<RecalDatumNode<T>, Double>(this, myValue);
+
+        for ( final RecalDatumNode<T> sub : subnodes ) {
+            final Pair<RecalDatumNode<T>, Double> subFind = sub.getMinPenaltyNode();
+            if ( subFind.getSecond() < maxNode.getSecond() ) {
+                maxNode = subFind;
+            }
+        }
+
+        return maxNode;
+    }
+
+    private Pair<RecalDatumNode<T>, Boolean> removeNode(final RecalDatumNode<T> nodeToRemove) {
+        if ( this == nodeToRemove ) {
+            if ( isLeaf() )
+                throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove);
+            // node is the thing we are going to remove, but without any subnodes
+            final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty);
+            return new Pair<RecalDatumNode<T>, Boolean>(node, true);
+        } else {
+            // did we remove something in a sub branch?
+            boolean removedSomething = false;
+
+            // our sub nodes with the penalty node removed
+            final Set<RecalDatumNode<T>> sub = new HashSet<RecalDatumNode<T>>(getNumBranches());
+
+            for ( final RecalDatumNode<T> sub1 : subnodes ) {
+                if ( removedSomething ) {
+                    // already removed something, just add sub1 back to sub
+                    sub.add(sub1);
+                } else {
+                    // haven't removed anything yet, so try
+                    final Pair<RecalDatumNode<T>, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove);
+                    removedSomething = maybeRemoved.getSecond();
+                    sub.add(maybeRemoved.getFirst());
+                }
+            }
+
+            final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, sub);
+            return new Pair<RecalDatumNode<T>, Boolean>(node, removedSomething);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java
@ -23,11 +23,13 @@
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
 import org.broadinstitute.sting.utils.Utils;
@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.io.Resource;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -59,7 +60,7 @@ import java.util.*;
 * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration.
 */

-public class RecalDataManager {
+public class RecalUtils {
    public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments";
    public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized";
    public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0";
@ -85,13 +86,108 @@ public class RecalDataManager {

    private static final String SCRIPT_FILE = "BQSR.R";

-    private static final Pair<String, String> covariateValue     = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
-    private static final Pair<String, String> covariateName      = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
-    private static final Pair<String, String> eventType          = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
-    private static final Pair<String, String> empiricalQuality   = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
-    private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
-    private static final Pair<String, String> nObservations      = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
-    private static final Pair<String, String> nErrors            = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
+    private static final Pair<String, String> covariateValue     = new Pair<String, String>(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s");
+    private static final Pair<String, String> covariateName      = new Pair<String, String>(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s");
+    private static final Pair<String, String> eventType          = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
+    private static final Pair<String, String> empiricalQuality   = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
+    private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
+    private static final Pair<String, String> nObservations      = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
+    private static final Pair<String, String> nErrors            = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d");
+
+    /**
+     * Generates two lists : required covariates and optional covariates based on the user's requests.
+     *
+     * Performs the following tasks in order:
+     *  1. Adds all requierd covariates in order
+     *  2. Check if the user asked to use the standard covariates and adds them all if that's the case
+     *  3. Adds all covariates requested by the user that were not already added by the two previous steps
+     *
+     * @param argumentCollection the argument collection object for the recalibration walker
+     * @return a pair of ordered lists : required covariates (first) and optional covariates (second)
+     */
+    public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
+        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
+        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
+
+        final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses);                   // add the required covariates
+        ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
+        if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
+            optionalCovariates = addStandardCovariatesToList(standardClasses);                                          // add the standard covariates if -standard was specified by the user
+
+        if (argumentCollection.COVARIATES != null) {                                                                    // parse the -cov arguments that were provided, skipping over the ones already specified
+            for (String requestedCovariateString : argumentCollection.COVARIATES) {
+                boolean foundClass = false;
+                for (Class<? extends Covariate> covClass : covariateClasses) {
+                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) {                          // -cov argument matches the class name for an implementing class
+                        foundClass = true;
+                        if (!requiredClasses.contains(covClass) &&
+                                (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
+                            try {
+                                final Covariate covariate = covClass.newInstance();                                     // now that we've found a matching class, try to instantiate it
+                                optionalCovariates.add(covariate);
+                            } catch (Exception e) {
+                                throw new DynamicClassResolutionException(covClass, e);
+                            }
+                        }
+                    }
+                }
+
+                if (!foundClass) {
+                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
+                }
+            }
+        }
+        return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
+    }
+
+    /**
+     * Adds the required covariates to a covariate list
+     *
+     * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
+     *
+     * @param classes list of classes to add to the covariate list
+     * @return the covariate list
+     */
+    private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
+        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
+        if (classes.size() != 2)
+            throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
+
+        dest.add(new ReadGroupCovariate());                                                                             // enforce the order with RG first and QS next.
+        dest.add(new QualityScoreCovariate());
+        return dest;
+    }
+
+    /**
+     * Adds the standard covariates to a covariate list
+     *
+     * @param classes list of classes to add to the covariate list
+     * @return the covariate list
+     */
+    private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
+        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
+        for (Class<?> covClass : classes) {
+            try {
+                final Covariate covariate = (Covariate) covClass.newInstance();
+                dest.add(covariate);
+            } catch (Exception e) {
+                throw new DynamicClassResolutionException(covClass, e);
+            }
+        }
+        return dest;
+    }
+
+    public static void listAvailableCovariates(Logger logger) {
+        // Get a list of all available covariates
+        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
+
+        // Print and exit if that's what was requested
+        logger.info("Available covariates:");
+        for (Class<?> covClass : covariateClasses)
+            logger.info(covClass.getSimpleName());
+        logger.info("");
+    }


    public enum SOLID_RECAL_MODE {
@ -152,64 +248,6 @@ public class RecalDataManager {
        }
    }

-    /**
-     * Generates two lists : required covariates and optional covariates based on the user's requests.
-     *
-     * Performs the following tasks in order:
-     *  1. Adds all requierd covariates in order
-     *  2. Check if the user asked to use the standard covariates and adds them all if that's the case
-     *  3. Adds all covariates requested by the user that were not already added by the two previous steps
-     *
-     * @param argumentCollection the argument collection object for the recalibration walker
-     * @return a pair of ordered lists : required covariates (first) and optional covariates (second)
-     */
-    public static Pair<ArrayList<Covariate>, ArrayList<Covariate>> initializeCovariates(RecalibrationArgumentCollection argumentCollection) {
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
-        final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
-        final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
-
-        final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses);                   // add the required covariates
-        ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
-        if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES)
-            optionalCovariates = addStandardCovariatesToList(standardClasses);                                          // add the standard covariates if -standard was specified by the user
-
-        if (argumentCollection.COVARIATES != null) {                                                                    // parse the -cov arguments that were provided, skipping over the ones already specified
-            for (String requestedCovariateString : argumentCollection.COVARIATES) {
-                boolean foundClass = false;
-                for (Class<? extends Covariate> covClass : covariateClasses) {
-                    if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) {                          // -cov argument matches the class name for an implementing class
-                        foundClass = true;
-                        if (!requiredClasses.contains(covClass) &&
-                                (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) {
-                            try {
-                                final Covariate covariate = covClass.newInstance();                                     // now that we've found a matching class, try to instantiate it
-                                optionalCovariates.add(covariate);
-                            } catch (Exception e) {
-                                throw new DynamicClassResolutionException(covClass, e);
-                            }
-                        }
-                    }
-                }
-
-                if (!foundClass) {
-                    throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates.");
-                }
-            }
-        }
-        return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
-    }
-
-    public static void listAvailableCovariates(Logger logger) {
-        // Get a list of all available covariates
-        final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
-
-        // Print and exit if that's what was requested
-        logger.info("Available covariates:");
-        for (Class<?> covClass : covariateClasses)
-            logger.info(covClass.getSimpleName());
-        logger.info("");
-    }
-
    private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
        List<GATKReportTable> result = new LinkedList<GATKReportTable>();
        int reportTableIndex = 0;
@ -272,8 +310,8 @@ public class RecalDataManager {
                reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
                if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
                    reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported());   // we only add the estimated Q reported in the RG table
-                reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
-                reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
+                reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
+                reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());

                rowIndex++;
            }
@ -320,7 +358,7 @@ public class RecalDataManager {
        files.getFirst().close();

        final RScriptExecutor executor = new RScriptExecutor();
-        executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
+        executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class));
        executor.addArgs(csvFileName.getAbsolutePath());
        executor.addArgs(plotFileName.getAbsolutePath());
        executor.exec();
@ -480,14 +518,14 @@ public class RecalDataManager {
     */
    public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
        if (ReadUtils.isSOLiDRead(read)) {                                                                              // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
-            if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) {                            // Haven't calculated the inconsistency array yet for this read
-                final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
+            if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) {                            // Haven't calculated the inconsistency array yet for this read
+                final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG);
                if (attr != null) {
                    byte[] colorSpace;
                    if (attr instanceof String)
                        colorSpace = ((String) attr).getBytes();
                    else
-                        throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
+                        throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
                    
                    byte[] readBases = read.getReadBases();                                                             // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
                    if (read.getReadNegativeStrandFlag())
@ -501,7 +539,7 @@ public class RecalDataManager {
                        inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1);
                        prevBase = readBases[i];
                    }
-                    read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
+                    read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
                }
                else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION)                                             // if the strategy calls for an exception, throw it
                    throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
@ -545,7 +583,7 @@ public class RecalDataManager {
     * @return Returns true if the base was inconsistent with the color space
     */
    public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
-        final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
+        final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG);
        if (attr != null) {
            final byte[] inconsistency = (byte[]) attr;
            // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
@ -691,40 +729,4 @@ public class RecalDataManager {
    }


-    /**
-     * Adds the required covariates to a covariate list
-     *
-     * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand.
-     *
-     * @param classes list of classes to add to the covariate list
-     * @return the covariate list
-     */
-    private static ArrayList<Covariate> addRequiredCovariatesToList(List<Class<? extends RequiredCovariate>> classes) {
-        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
-        if (classes.size() != 2)
-            throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected");
-
-        dest.add(new ReadGroupCovariate());                                                                             // enforce the order with RG first and QS next.
-        dest.add(new QualityScoreCovariate());
-        return dest;
-    }
-
-    /**
-     * Adds the standard covariates to a covariate list
-     *
-     * @param classes list of classes to add to the covariate list
-     * @return the covariate list
-     */
-    private static ArrayList<Covariate> addStandardCovariatesToList(List<Class<? extends StandardCovariate>> classes) {
-        ArrayList<Covariate> dest = new ArrayList<Covariate>(classes.size());
-        for (Class<?> covClass : classes) {
-            try {
-                final Covariate covariate = (Covariate) covClass.newInstance();
-                dest.add(covariate);
-            } catch (Exception e) {
-                throw new DynamicClassResolutionException(covClass, e);
-            }
-        }
-        return dest;
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java
@ -1,11 +1,12 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.gatk.walkers.bqsr.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

 import java.io.File;
 import java.io.PrintStream;
@ -33,13 +34,13 @@ public class RecalibrationReport {
    public RecalibrationReport(final File RECAL_FILE) {
        final GATKReport report = new GATKReport(RECAL_FILE);

-        argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
+        argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
        RAC = initializeArgumentCollectionTable(argumentTable);

-        GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
+        GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
        quantizationInfo = initializeQuantizationTable(quantizedTable);

-        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC);       // initialize the required and optional covariates
+        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC);       // initialize the required and optional covariates
        ArrayList<Covariate> requiredCovariates = covariates.getFirst();
        ArrayList<Covariate> optionalCovariates = covariates.getSecond();
        requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
@ -57,13 +58,13 @@ public class RecalibrationReport {
        for (Covariate cov : requestedCovariates)
            cov.initialize(RAC);                                                                                        // initialize any covariate member variables using the shared argument collection

-        recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
+        recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));

-        parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
+        parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));

-        parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
+        parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));

-        parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
+        parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);

    }

@ -85,7 +86,7 @@ public class RecalibrationReport {
    private int countReadGroups(final GATKReportTable reportTable) {
        Set<String> readGroups = new HashSet<String>();
        for ( int i = 0; i < reportTable.getNumRows(); i++ )
-            readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
+            readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
        return readGroups.size();
    }

@ -139,17 +140,17 @@ public class RecalibrationReport {
 \     */
    private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
        for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
            tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
+            final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
            tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);

-            final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
+            final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
            final int covIndex = optionalCovariateIndexes.get(covName);
-            final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
+            final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
            tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);

-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
            tempCOVarray[3] = event.index;

            recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
@ -164,11 +165,11 @@ public class RecalibrationReport {
     */
    private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> qualTable) {
        for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
            tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
+            final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
            tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
            tempQUALarray[2] = event.index;

            qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
@ -183,9 +184,9 @@ public class RecalibrationReport {
     */
    private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) {
        for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
-            final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
+            final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
            tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
-            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
+            final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
            tempRGarray[1] = event.index;

            rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
@ -193,13 +194,13 @@ public class RecalibrationReport {
    }

    private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
-        final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
-        final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
-        final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
+        final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
+        final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
+        final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);

        final double estimatedQReported = hasEstimatedQReportedColumn ?                                                 // the estimatedQreported column only exists in the ReadGroup table
-                (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) :                      // we get it if we are in the read group table
-                Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME));              // or we use the reported quality if we are in any other table
+                (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) :                      // we get it if we are in the read group table
+                Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME));              // or we use the reported quality if we are in any other table

        final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
        datum.setEstimatedQReported(estimatedQReported);
@ -218,8 +219,8 @@ public class RecalibrationReport {
        final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
        for ( int i = 0; i < table.getNumRows(); i++ ) {
            final byte originalQual = (byte)i;
-            final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
-            final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
+            final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
+            final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
            final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
            final long quantizedCount = Long.parseLong(countObject.toString());
            quals[originalQual] = quantizedQual;
@ -239,7 +240,7 @@ public class RecalibrationReport {

        for ( int i = 0; i < table.getNumRows(); i++ ) {
            final String argument = table.get(i, "Argument").toString();
-            Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+            Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
            if (value.equals("null"))
                value = null;                                                                                           // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport

@ -250,10 +251,10 @@ public class RecalibrationReport {
                RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);

            else if (argument.equals("solid_recal_mode"))
-                RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
+                RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value);

            else if (argument.equals("solid_nocall_strategy"))
-                RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
+                RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);

            else if (argument.equals("mismatches_context_size"))
                RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
@ -307,7 +308,7 @@ public class RecalibrationReport {
    }

    public void output(PrintStream output) {
-        RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
+        RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
    }

    public RecalibrationArgumentCollection getRAC() {
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java
@ -25,9 +25,7 @@

 package org.broadinstitute.sting.utils.recalibration;

-import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
-import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;

 /**
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java
@ -23,8 +23,10 @@
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 /*
@ -89,8 +91,3 @@ public interface Covariate {
    public int maximumKeyValue();
 }

-interface RequiredCovariate extends Covariate {}
-
-interface StandardCovariate extends Covariate {}
-
-interface ExperimentalCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.UserException;
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java
@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface ExperimentalCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration.covariates;

+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java
@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface RequiredCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java
@ -0,0 +1,30 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+/**
+ * [Short one sentence description of this walker]
+ * <p/>
+ * <p>
+ * [Functionality of this walker]
+ * </p>
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * [Input description]
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public interface StandardCovariate extends Covariate {}
--- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java
@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.recalibration.EventType;

 import java.util.ArrayList;
 import java.util.Arrays;
@ -476,7 +476,6 @@ public class AlignmentUtils {
                    }
                    break;
                case D:
-                case N:
                    if (!isDeletion) {
                        alignmentPos += elementLength;
                    } else {
@ -498,6 +497,7 @@ public class AlignmentUtils {
                    break;
                case H:
                case P:
+                case N:
                    break;
                default:
                    throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
@ -516,16 +516,13 @@ public class AlignmentUtils {
            final int elementLength = ce.getLength();

            switch (ce.getOperator()) {
-                case I:
-                case S:
-                    break;
                case D:
                case N:
-                    alignmentLength += elementLength;
-                    break;
                case M:
                    alignmentLength += elementLength;
                    break;
+                case I:
+                case S:
                case H:
                case P:
                    break;
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@ -25,7 +25,7 @@
 package org.broadinstitute.sting.utils.sam;

 import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.NGSPlatform;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
     * in this VC is returned as the set of alleles in the subContext, even if
     * some of those alleles aren't in the samples
     *
+     * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
+     *
     * @param sampleNames    the sample names
-     * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
+     * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
     * @return new VariantContext subsetting to just the given samples
     */
    public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
-        if ( sampleNames.containsAll(getSampleNames()) ) {
+        if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
            return this; // fast path when you don't have any work to do
        } else {
            VariantContextBuilder builder = new VariantContextBuilder(this);
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
        }
    }

+    /**
+     * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
+     *
+     * @param sampleNames
+     * @return
+     */
+    public VariantContext subContextFromSamples(final Set<String> sampleNames) {
+        return subContextFromSamples(sampleNames, true);
+    }
+
    public VariantContext subContextFromSample(String sampleName) {
-        return subContextFromSamples(Collections.singleton(sampleName), true);
+        return subContextFromSamples(Collections.singleton(sampleName));
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
+import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -83,6 +84,9 @@ import java.util.*;
 * @since 06/12
 */
 class BCF2Writer extends IndexingVariantContextWriter {
+    public static final int MAJOR_VERSION = 2;
+    public static final int MINOR_VERSION = 1;
+
    /**
     * If true, we will write out the undecoded raw bytes for a genotypes block, if it
     * is found in the input VC.  This can be very dangerous as the genotype encoding
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
            writer.close();

            final byte[] headerBytes = capture.toByteArray();
-            outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
+            new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
            BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
            outputStream.write(headerBytes);
        } catch (IOException e) {
--- a/public/java/test/org/broadinstitute/sting/BaseTest.java
+++ b/public/java/test/org/broadinstitute/sting/BaseTest.java
@ -282,12 +282,12 @@ public abstract class BaseTest {
    private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;

    public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
-        Assert.assertTrue(actual instanceof Double);
+        Assert.assertTrue(actual instanceof Double, "Not a double");
        assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
    }

    public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
-        Assert.assertTrue(actual instanceof Double);
+        Assert.assertTrue(actual instanceof Double, "Not a double");
        assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
    }

@ -303,13 +303,13 @@ public abstract class BaseTest {

    public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
        if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
-            Assert.assertTrue(Double.isNaN(actual));
+            Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
        else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
-            Assert.assertTrue(Double.isInfinite(actual));
+            Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
        else {
            final double delta = Math.abs(actual - expected);
            final double ratio = Math.abs(actual / expected - 1.0);
-            Assert.assertTrue(delta < tolerance || ratio < tolerance);
+            Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
        }
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
@ -24,9 +24,12 @@

 package org.broadinstitute.sting.gatk.datasources.reads;

+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
 import static org.testng.Assert.fail;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMProgramRecord;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.Tags;
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
 import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.annotations.AfterMethod;
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
            fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
        }
    }
+
+    /** Test that we clear program records when requested */
+    @Test
+    public void testRemoveProgramRecords() {
+        logger.warn("Executing testRemoveProgramRecords");
+
+        // setup the data
+        readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
+
+        // use defaults
+        SAMDataSource data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false);
+
+        List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
+        assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
+
+        boolean removeProgramRecords = false;
+        data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false,
+                BAQ.CalculationMode.OFF,
+                BAQ.QualityMode.DONT_MODIFY,
+                null, // no BAQ
+                null, // no BQSR
+                (byte) -1,
+                removeProgramRecords);
+
+        List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
+        assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
+
+        removeProgramRecords = true;
+        data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false,
+                BAQ.CalculationMode.OFF,
+                BAQ.QualityMode.DONT_MODIFY,
+                null, // no BAQ
+                null, // no BQSR
+                (byte) -1,
+                removeProgramRecords);
+
+        List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
+        assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;

 import org.broadinstitute.sting.gatk.report.GATKReport;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
 import org.testng.Assert;
 import org.testng.annotations.Test;

@ -33,15 +34,15 @@ public class BQSRGathererUnitTest {
        for (GATKReportTable originalTable : originalReport.getTables()) {
            GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
            List<String> columnsToTest = new LinkedList<String>();
-            columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
-            columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
-            if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) {                    // these tables must be IDENTICAL
-                columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+            columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
+            columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
+            if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) {                    // these tables must be IDENTICAL
+                columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
                testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
            }
            
-            else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) {
-                columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
+            else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) {
+                columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
                testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
            }
            
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
@ -38,13 +38,17 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;

 import java.util.*;


 public class ArtificialReadPileupTestProvider {
+    final String refBases = "ACAGAGCTGACCCTCCCTCCCCTCTCCCAGTGCAACAGCACGGGCGGCGACTGCTTTTACCGAGGCTACACGTCAGGCGTGGCGGCTGTCCAGGACTGGTACCACTTCCACTATGTGGATCTCTGCTGAGGACCAGGAAAGCCAGCACCCGCAGAGACTCTTCCCCAGTGCTCCATACGATCACCATTCTCTGCAGAAGGTCAGACGTCACTGGTGGCCCCCCAGCCTCCTCAGCAGGGAAGGATACTGTCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGCCTTGGACCAGCTGGTGCAGTTTGCCTACACGGCTGAGATTGTGGTGGGCGAGGGC";
    final int contigStart = 1;
-    final int contigStop = 10;
+    final int contigStop = refBases.length();
    final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
 //    final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
    final String artificialContig = "chr1";
@ -54,16 +58,18 @@ public class ArtificialReadPileupTestProvider {
    final int artificialMappingQuality = 60;
    Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
    List<SAMReadGroupRecord> sampleRGs;
-
-    final String refBases = "AGGATACTGT";
    List<String> sampleNames = new ArrayList<String>();
    private String sampleName(int i) { return sampleNames.get(i); }
    private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
-    public final int offset = 5;
+    public final int locStart = 105; // start position where we desire artificial variant
+    private final int readLength = 10; // desired read length in pileup
+    public final int readOffset = 4;
+    private final int readStart = locStart - readOffset;
    public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
-    public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
-    public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
-    public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
+    public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart);
+    public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,locStart-100,locStart+100);
+    public final String windowBases = refBases.substring(locStart-100-1,locStart+100);
+    public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,windowBases.getBytes());

    byte BASE_QUAL = 50;

@ -96,25 +102,28 @@ public class ArtificialReadPileupTestProvider {
    public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
        return getAlignmentContextFromAlleles(eventLength, altBases, numReadsPerAllele, false, BASE_QUAL);
    }
-    public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele,
-                                                                       boolean addBaseErrors, int phredScaledBaseErrorRate) {
-        //    RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
+    public Map<String,AlignmentContext> getAlignmentContextFromAlleles(final int eventLength,
+                                                                       final String altBases,
+                                                                       final int[] numReadsPerAllele,
+                                                                       final boolean addBaseErrors,
+                                                                       final int phredScaledBaseErrorRate) {
+        final String refChar = new String(new byte[]{referenceContext.getBase()});

        String refAllele, altAllele;
        if (eventLength == 0)  {
            // SNP case
-            refAllele = new String(new byte[]{referenceContext.getBase()});
-            altAllele = new String(altBases.substring(0,1));
+            refAllele = refChar;
+            altAllele = altBases.substring(0,1);

        } else if (eventLength>0){
            // insertion
-            refAllele = "";
-            altAllele = altBases.substring(0,eventLength);
+            refAllele = refChar;
+            altAllele = refChar+altBases/*.substring(0,eventLength)*/;
        }
        else {
            // deletion
-            refAllele = refBases.substring(offset,offset+Math.abs(eventLength));
-            altAllele = "";
+            refAllele = new String(referenceContext.getForwardBases()).substring(0,Math.abs(eventLength)+1);
+            altAllele = refChar;
        }

        Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
@ -138,22 +147,21 @@ public class ArtificialReadPileupTestProvider {
    private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases,
                                                    int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) {
        List<PileupElement> pileupElements = new ArrayList<PileupElement>();
-        int offset = (contigStop-contigStart+1)/2;
-        int refAlleleLength = refAllele.length();
+        final int refAlleleLength = refAllele.length();

-        pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
-        pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
+        pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true));
+        pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, readStart, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false));
        return new ReadBackedPileupImpl(loc,pileupElements);
    }

-    private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {
+    private List<PileupElement> createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) {

        int alleleLength = allele.length();
        List<PileupElement> pileupElements = new ArrayList<PileupElement>();

        int readCounter = 0;
        for ( int d = 0; d < numReadsPerAllele; d++ ) {
-            byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
+            byte[] readBases = trueHaplotype(allele, refAlleleLength, readLength);
            if (addErrors)
                addBaseErrors(readBases, phredScaledErrorRate);

@ -165,20 +173,20 @@ public class ArtificialReadPileupTestProvider {
            read.setReadBases(readBases);
            read.setReadName(artificialReadName+readCounter++);

-            boolean isBeforeDeletion = false, isBeforeInsertion = false;
+            boolean isBeforeDeletion = alleleLength<refAlleleLength;
+            boolean isBeforeInsertion = alleleLength>refAlleleLength;
+
+            int eventLength = alleleLength - refAlleleLength;
            if (isReference)
                read.setCigarString(readBases.length + "M");
            else {
-                isBeforeDeletion = alleleLength<refAlleleLength;
-                isBeforeInsertion = alleleLength>refAlleleLength;
                if (isBeforeDeletion || isBeforeInsertion)
-                    read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
-                            (readBases.length-offset)+"M");
+                    read.setCigarString((readOffset+1)+"M"+ Math.abs(eventLength) + (isBeforeDeletion?"D":"I") +
+                            (readBases.length-readOffset)+"M");
                else // SNP case
                    read.setCigarString(readBases.length+"M");
            }

-            int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
            read.setReadPairedFlag(false);
            read.setAlignmentStart(readStart);
            read.setMappingQuality(artificialMappingQuality);
@ -187,16 +195,25 @@ public class ArtificialReadPileupTestProvider {
            read.setAttribute("RG", sampleRG(sample).getReadGroupId());


-            pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
+            pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength)));
        }

        return pileupElements;
    }

-    private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) {
+    /**
+     * Create haplotype with desired allele and reference context
+     * @param allele                             Desired allele string
+     * @param refAlleleLength                    Length of reference allele.
+     * @param desiredLength                      Desired haplotype length
+     * @return                                   String with haplotype formed by (prefix)+allele bases + postfix
+     */
+    private byte[] trueHaplotype(final String allele, final int refAlleleLength, final int desiredLength) {
        // create haplotype based on a particular allele
-        String prefix = refBases.substring(0, offset);
-        String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
+        final int startIdx= locStart - readOffset-1;
+
+        final String prefix = refBases.substring(startIdx, locStart-1);
+        final String postfix = refBases.substring(locStart+refAlleleLength-1,startIdx + desiredLength);

        return (prefix+allele+postfix).getBytes();
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
@ -45,7 +45,6 @@ import org.testng.annotations.Test;
 */
 public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
    
-    final String refBases = "AGGATACTGT";
    final int nSamples = 1;
    final int[] numReadsPerAllele = new int[]{10,10};
    final String SAMPLE_PREFIX = "sample";
@ -65,7 +64,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
    @Test
    public void testBasicConsensusCounts() {
        // 4 inserted bases, min cnt = 10
-        String altBases = "CCTCCTGAGA";
+        String altBases = "CCTC";
        int eventLength = 4;
        List<Allele> alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
        
@ -73,13 +72,11 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
        Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));


-
-        //altBases = "CCTCMTGAGA";
-
+        // test deletions
        eventLength = 3;
        alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
        Assert.assertEquals(alleles.size(),2);
-        Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
+        Assert.assertEquals(alleles.get(0).getBaseString().substring(1,eventLength), new String(pileupProvider.getReferenceContext().getForwardBases()).substring(1,eventLength));

        // same with min Reads = 11
        alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@ -92,14 +89,14 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
        Assert.assertEquals(alleles.size(),0);

        // test N's in insertions
-        altBases = "CCTCNTGAGA";
+        altBases = "CCTC";
        eventLength = 4;
        alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);

        Assert.assertEquals(alleles.size(),2);
-        Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength));
+        Assert.assertEquals(alleles.get(1).getBaseString().substring(1,eventLength+1), altBases);

-        altBases = "CCTCNTGAGA";
+        altBases = "CCTCN";
        eventLength = 5;
        alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);

--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -373,13 +373,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {

    // --------------------------------------------------------------------------------------------------------------
    //
-    // testing SnpEff
+    // testing MinIndelFraction
    //
    // --------------------------------------------------------------------------------------------------------------

    final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
            + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
-    
+
    @Test
    public void testMinIndelFraction0() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -403,4 +403,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
        executeTest("test minIndelFraction 1.0", spec);
    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // testing Ns in CIGAR
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    @Test
+    public void testNsInCigar() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
+                Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29"));
+        executeTest("test calling on reads with Ns in CIGAR", spec);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@ -34,7 +34,7 @@ import java.util.Arrays;
 import java.util.List;

 public class VariantEvalIntegrationTest extends WalkerTest {
-    private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
+    private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
    private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
    private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
    private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                        "-o %s"
                ),
                1,
-                Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
+                Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
        );
        executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
    }
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                        "-o %s"
                ),
                1,
-                Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
+                Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
        );
        executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
    }

    VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
-            "62f81e7d2082fbc71cae0101c27fefad",  // tranches
-            "b9709e4180e56abc691b208bd3e8626c",  // recal file
-            "75c178345f70ca2eb90205662fbdf968"); // cut VCF
+            "f360ce3eb2b0b887301be917a9843e2b",  // tranches
+            "287fea5ea066bf3fdd71f5ce9b58eab3",  // recal file
+            "356b9570817b9389da71fbe991d8b2f5"); // cut VCF

    @DataProvider(name = "VRTest")
    public Object[][] createData1() {
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java
@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
    public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
        final String collapsed = BCF2Utils.collapseStringList(strings);
        Assert.assertEquals(collapsed, expected);
-        Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
+        Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
    }

    // -----------------------------------------------------------------
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
        String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";

        String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
-        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
        executeTest("Test reading and writing 1000G Phase I SVs", spec1);
    }

@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
        String testVCF = privateTestDir + "ex2.vcf";
        String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
        String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
-        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
+        WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
        executeTest("Test writing samtools WEx BCF example", spec1);
    }

--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java
@ -1,5 +1,8 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest {
        requestedCovariates[2] = coCov;
        requestedCovariates[3] = cyCov;

-        ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
+        ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);

        // check that the length is correct
        Assert.assertEquals(rc.getMismatchesKeySet().length, length);
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java
@ -1,5 +1,7 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java
@ -1,9 +1,10 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
+package org.broadinstitute.sting.utils.recalibration;

+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.recalibration.covariates.*;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest {

        final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
        int nKeys = 0;                                                                                                  // keep track of how many keys were produced
-        final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
+        final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);

        final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
        final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java
@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
                    public void run(final VariantContext vc) {
                        if ( samples == null )
                            samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
-                        VariantContext sub = vc.subContextFromSamples(samples, true);
+                        VariantContext sub = vc.subContextFromSamples(samples);
                        sub.getNSamples();
                    }
                };
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@ -347,6 +347,7 @@ class DataProcessingPipeline extends QScript {
    this.knownSites ++= qscript.dbSNP
    this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
    this.input_file :+= inBam
+    this.disable_indel_quals = true
    this.out = outRecalFile
    if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
    if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
@ -359,7 +360,6 @@ class DataProcessingPipeline extends QScript {
  case class recal (inBam: File, inRecalFile: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
    this.input_file :+= inBam
    this.BQSR = inRecalFile
-    this.disable_indel_quals = true
    this.baq = CalculationMode.CALCULATE_AS_NECESSARY
    this.out = outBam
    if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
@ -166,6 +166,7 @@ class PacbioProcessingPipeline extends QScript {
    this.knownSites :+= dbSNP
    this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "ContextCovariate")
    this.input_file :+= inBam
+    this.disable_indel_quals = true
    this.out = outRecalFile
    this.analysisName = queueLogDir + outRecalFile + ".covariates"
    this.jobName = queueLogDir + outRecalFile + ".covariates"
@ -178,7 +179,6 @@ class PacbioProcessingPipeline extends QScript {
    this.input_file :+= inBam
    this.BQSR = inRecalFile
    this.out = outBam
-    this.disable_indel_quals = true
    this.isIntermediate = false
    this.analysisName = queueLogDir + outBam + ".recalibration"
    this.jobName = queueLogDir + outBam + ".recalibration"
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
@ -41,7 +41,7 @@ class DataProcessingPipelineTest {
      " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
      " -test ",
      " -p " + projectName).mkString
-    spec.fileMD5s += testOut -> "0f0b32e94640a8d548b4c1134ad4c075"
+    spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c"
    PipelineTest.executeTest(spec)
  }

@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
      " -bwa /home/unix/carneiro/bin/bwa",
      " -bwape ",
      " -p " + projectName).mkString
-    spec.fileMD5s += testOut -> "6b4f13d22b45d7d617ee959fdc278ed2"
+    spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59"
    PipelineTest.executeTest(spec)
  }

--- a/Show More
+++ b/Show More