Merge remote-tracking branch 'unstable/master'

2012-08-20 21:18:54 -04:00 · 2012-08-20 21:18:54 -04:00 · 5b1781fdac
parent 5f82ffd5d8 5db3bd6fd2
commit 5b1781fdac
272 changed files with 5955 additions and 3968 deletions
--- a/build.xml
+++ b/build.xml
@ -76,7 +76,8 @@
    <property name="staging.dir" value="staging" />
    <property name="default.executable" value="none" />

-    <!-- Javadoc/Scaladoc directories -->
+    <!-- GATKDocs/Javadoc/Scaladoc directories -->
+    <property name="gatkdocs.dir" value="gatkdocs" />
    <property name="javadoc.dir" value="javadoc" />
    <property name="scaladoc.dir" value="scaladoc" />

@ -92,12 +93,10 @@
    <!-- To disable for test targets, run with -Duse.contracts=false -->
    <!-- To enable for non-test targets, run with -Duse.contracts=true -->
    <property name="java.contracts.dir" value="${build.dir}/java/contracts" />
-    <property name="contracts.version" value="1.0-20110609" />
+    <property name="contracts.version" value="1.0-r139" />
    <property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
    <property name="contract.dump.dir" value="dump" />

-    <property name="gatkdocs.dir" value="gatkdocs" />
-
    <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
    <property name="halt" value="yes" />

@ -208,19 +207,19 @@
        <include name="**/*.java" />
    </fileset>

-    <pathconvert property="external.build.dir">
-        <path path="${java.classes}"/>
-    </pathconvert>
+    <path id="external.build.dir">
+        <path path="${java.classes}" />
+    </path>

-    <pathconvert property="external.dist.dir">
+    <path id="external.dist.dir">
        <path path="${dist.dir}" />
-    </pathconvert>
+    </path>

    <!-- GATK dependencies consist of 3rd party plugins plus compiled GATK classes -->
-    <pathconvert property="external.gatk.classpath">
+    <path id="external.gatk.classpath">
        <path path="${java.classes}"/>
        <path refid="external.dependencies" />
-    </pathconvert>
+    </path>

    <!-- the path for resources that need to go into the GATK jar;
         any additional resources should go into this set. -->
@ -430,9 +429,9 @@

    <target name="gatk.compile.external.source" depends="gatk.compile.internal.source" if="include.external">
        <subant target="compile" genericantfile="build.xml">
-            <property name="build.dir" value="${external.build.dir}" />
-            <property name="dist.dir" value="${external.dist.dir}" />
-            <property name="gatk.classpath" value="${external.gatk.classpath}" />
+            <property name="build.dir" refid="external.build.dir" />
+            <property name="dist.dir" refid="external.dist.dir" />
+            <property name="gatk.classpath" refid="external.gatk.classpath" />
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
        </subant>
    </target>
@ -680,9 +679,9 @@
        </jar>

        <subant target="dist" genericantfile="build.xml">
-            <property name="build.dir" value="${external.build.dir}" />
-            <property name="dist.dir" value="${external.dist.dir}" />
-            <property name="gatk.classpath" value="${external.gatk.classpath}" />
+            <property name="build.dir" refid="external.build.dir" />
+            <property name="dist.dir" refid="external.dist.dir" />
+            <property name="gatk.classpath" refid="external.gatk.classpath" />
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
        </subant>
    </target>
--- a/ivy.xml
+++ b/ivy.xml
@ -52,7 +52,7 @@
        <dependency org="gov.nist" name="Jama" rev="1.0.2"/>

        <!-- Dependencies for the graph aligner -->
-        <dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
+        <dependency org="net.sf.jgrapht" name="jgrapht" rev="0.8.3"/>

        <!-- Dependencies for the html walker documention -->
        <dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
@ -87,7 +87,7 @@
        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>

        <!-- Contracts for Java and dependencies -->
-        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-20110609"/>
+        <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
        <dependency org="asm" name="asm-all" rev="3.3.1"/>

        <!-- POI, for reading pipeline files -->
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

 public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
@ -5,7 +5,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
-import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@ -1,6 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import com.google.java.contract.Requires;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
+import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.LinkedHashMap;

 /**
 * Created by IntelliJ IDEA.
@ -30,24 +35,26 @@ public class ErrorModel  {
    private static final boolean compressRange = false;
    
    private static final double log10MinusE = Math.log10(Math.exp(1.0));
-
+    private static final boolean DEBUG = false;
    /**
     * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
     * 
-     * @param minQualityScore            Minimum site quality score to evaluate
-     * @param maxQualityScore            Maximum site quality score to evaluate
-     * @param phredScaledPrior           Prior for site quality
+     * @param UAC                           Argument Collection
     * @param refSamplePileup            Reference sample pileup
     * @param refSampleVC                VC with True alleles in reference sample pileup
-     * @param minPower                   Minimum power
     */
-    public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior,
-                       ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) {
-        this.maxQualityScore = maxQualityScore;
-        this.minQualityScore = minQualityScore;
-        this.phredScaledPrior = phredScaledPrior;
-        log10minPower = Math.log10(minPower);
+    public ErrorModel (final UnifiedArgumentCollection UAC,
+                       final ReadBackedPileup refSamplePileup,
+                       VariantContext refSampleVC, final ReferenceContext refContext) {
+        this.maxQualityScore = UAC.maxQualityScore;
+        this.minQualityScore = UAC.minQualityScore;
+        this.phredScaledPrior = UAC.phredScaledPrior;
+        log10minPower = Math.log10(UAC.minPower);

+        PairHMMIndelErrorModel pairModel = null;
+        LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
+        HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
+        double[][] perReadLikelihoods = null;

        double[] model = new double[maxQualityScore+1];
        Arrays.fill(model,Double.NEGATIVE_INFINITY);
@ -61,11 +68,17 @@ public class ErrorModel  {
                    break;
                }
            }
-
-
+            haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
+            if (refSampleVC.isIndel()) {
+                pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
+                        UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
+                indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
+                IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
+            }
        }
+
+        double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
        if (refSamplePileup == null || refSampleVC == null  || !hasCalledAlleles) {
-            double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
            for (byte q=minQualityScore; q<=maxQualityScore; q++) {
                // maximum uncertainty if there's no ref data at site
                model[q] = p;
@ -75,23 +88,48 @@ public class ErrorModel  {
        else {
            hasData = true;
            int matches = 0;
-            int coverage = refSamplePileup.getNumberOfElements();
+            int coverage = 0;

            Allele refAllele = refSampleVC.getReference();

+            if (refSampleVC.isIndel()) {
+                final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
+                //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
+                final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
+                if (!haplotypeMap.isEmpty())
+                    perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
+            }
+            int idx = 0;
            for (PileupElement refPileupElement : refSamplePileup) {
+                if (DEBUG)
+                    System.out.println(refPileupElement.toString());
                boolean isMatch = false;
-                for (Allele allele : refSampleVC.getAlleles())
-                    isMatch |= pileupElementMatches(refPileupElement, allele, refAllele);
+                for (Allele allele : refSampleVC.getAlleles()) {
+                    boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
+                    if (DEBUG) System.out.println(m);
+                    isMatch |= m;
+                }
+                if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) {
+                    // ignore match/mismatch if reads, as determined by their likelihood, are not informative
+                    double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
+                    if (!isInformativeElement(perAlleleLikelihoods))
+                        matches++;
+                    else
+                        matches += (isMatch?1:0);

-                matches += (isMatch?1:0);
-          //      System.out.format("MATCH:%b\n",isMatch);
+                }   else {
+                    matches += (isMatch?1:0);
+                }
+                coverage++;
            }

            int mismatches = coverage - matches;
            //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
            for (byte q=minQualityScore; q<=maxQualityScore; q++) {
-                model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage,  mismatches);
+                if (coverage==0)
+                    model[q] = p;
+                else
+                    model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage,  mismatches);
            }
            this.refDepth = coverage;
        }
@ -101,6 +139,17 @@ public class ErrorModel  {
    }


+    @Requires("likelihoods.length>0")
+    private boolean isInformativeElement(double[] likelihoods) {
+        // if likelihoods are the same, they're not informative
+        final double thresh = 0.1;
+        int maxIdx = MathUtils.maxElementIndex(likelihoods);
+        int minIdx = MathUtils.minElementIndex(likelihoods);
+        if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
+            return false;
+        else
+            return true;
+    }
    /**
     * Simple constructor that just takes a given log-probability vector as error model.
     * Only intended for unit testing, not general usage.
@ -115,23 +164,27 @@ public class ErrorModel  {

    }

-    public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) {
- /*       System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
+    public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
+        if (DEBUG)
+            System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
                pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
                pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
-   */

+        //pileupElement.
        // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
        if (allele.isReference()) {
            // for a ref allele, any base mismatch or new indel is a mismatch.
-            if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case
-                return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]);
+            if(allele.getBases().length>0)
+                // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
+                // could clean up/simplify this when unpadding is removed
+                return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
            else
                // either null allele to compare, or ref/alt lengths are different (indel by definition).
                // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
                return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
        }

+        // for non-ref alleles to compare:
        if (refAllele.getBases().length == allele.getBases().length)
            // alleles have the same length (eg snp or mnp)
            return pileupElement.getBase() == allele.getBases()[0];
@ -209,18 +262,19 @@ public class ErrorModel  {
    }

    public String toString() {
-        String result = "(";
+        StringBuilder result = new StringBuilder("(");
        boolean skipComma = true;
        for (double v : probabilityVector.getProbabilityVector()) {
            if (skipComma) {
                skipComma = false;
            }
            else {
-                result += ",";
+                result.append(",");
            }
-            result += String.format("%.4f", v);
+            result.append(String.format("%.4f", v));
        }
-        return result + ")";
+        result.append(")");
+        return result.toString();
    }
    
    public static int getTotalReferenceDepth(HashMap<String, ErrorModel>  perLaneErrorModels) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
 import java.io.PrintStream;
 import java.util.*;

-public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
+public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
    final protected UnifiedArgumentCollection UAC;

@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
    private final static boolean VERBOSE = false;

-    protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
+    protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
        ploidy = UAC.samplePloidy;
        this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        for ( final double[] likelihoods : GLs ) {

            final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
-            final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
+            final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
            // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
            for (int k=1; k < acCount.length;k++) {
                if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        return newPool;
    }

-    // todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
+    // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
    /**
     *
     * @param set                       ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
                continue;


-            PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
+            GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
        }


@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {

        // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
        // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
-        PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
+        GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
        final double[] x = originalPool.getLikelihoodsAsVector(true);
        while(firstIterator.hasNext()) {
            x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
            firstIterator.next();
        }

-        PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+        GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
        final double[] y = yy.clone();
        while(secondIterator.hasNext()) {
            y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {

        // initialize output to -log10(choose(m1+m2,[k1 k2...])
        final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
-        final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
+        final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);


        // Now, result(K) =  logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
            double denom =  -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);

            // for current conformation, get all possible ways to break vector K into two components G1 and G2
-            final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
+            final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
            set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
            while (innerIterator.hasNext()) {
                // check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
            if ( numOriginalAltAlleles == numNewAltAlleles) {
                newLikelihoods = originalLikelihoods;
            } else {
-                newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
+                newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);

                // might need to re-normalize
                newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        // find the genotype with maximum likelihoods
        final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);

-        final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
+        final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
        final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
        final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();

@ -681,8 +681,8 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
        }

        // per-pool logging of AC and AF
-        gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
-        gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
+        gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
+        gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);

        // remove PLs if necessary
        if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING)
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;

 import java.util.*;

-public abstract class PoolGenotypeLikelihoods {
+public abstract class GeneralPloidyGenotypeLikelihoods {
    protected final int numChromosomes;
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6

@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {

    private static final boolean FAST_GL_COMPUTATION = true;
    // constructor with given logPL elements
-    public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
-                                   final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
+    public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                            final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
        this.alleles = alleles;
        this.nAlleles = alleles.size();
        numChromosomes = ploidy;
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
            Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
        } else {
            if (logLikelihoods.length != likelihoodDim)
-                throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
+                throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");

            log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
        }
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
                final int numAlleles = currentState.length;
                final int ploidy = restrictSumTo;

-                linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
+                linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
            }
            else
                throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
    public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
                                                   final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {

-        int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
+        int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
        double[] newPLs = new double[newPLSize];


@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
                    newCount[idx] =  pVec[permutationKey[idx]];
    
                // get corresponding index from new count
-                int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
+                int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
                newPLs[outputIdx] = pl;
                if (VERBOSE) {
                    System.out.println("Old Key:"+Arrays.toString(pVec));
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
+public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {

    //protected Set<String> laneIDs;
    public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

    final protected UnifiedArgumentCollection UAC;

-    protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+    protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC,logger);
        this.UAC = UAC;

@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

            return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(),
                    referenceSampleVC.getAlleles())
-                    .referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel())
                    .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make())
                    .make();
        }
@ -138,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
    protected static class PoolGenotypeData {

        public final String name;
-        public final PoolGenotypeLikelihoods GL;
+        public final GeneralPloidyGenotypeLikelihoods GL;
        public final int depth;
        public final List<Allele> alleles;

-        public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
+        public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
            this.name = name;
            this.GL = GL;
            this.depth = depth;
@ -237,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
            ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();

            // create the GenotypeLikelihoods object
-            final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
+            final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
            // actually compute likelihoods
            final int nGoodBases = GL.add(pileup, UAC);
            if ( nGoodBases > 0 )
@ -247,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

        // find the alternate allele(s) that we should be using
        final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
-
+        if (alleles == null || alleles.isEmpty())
+            return null;
        // start making the VariantContext
        final GenomeLoc loc = ref.getLocus();
        final int endLoc = getEndLocation(tracker, ref, alleles);
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi

        for ( PoolGenotypeData sampleData : GLs ) {
            // extract from multidimensional array
-            final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
+            final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
                    allAlleles, alleles);

            // normalize in log space so that max element is zero.
@ -313,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
                    refLanePileup = refPileup.getPileupForLane(laneID);

                //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
-                perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior,  refLanePileup, refVC, UAC.minPower));
+                perLaneErrorModels.put(laneID, new ErrorModel(UAC,  refLanePileup, refVC, ref));
            }
            return perLaneErrorModels;

@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
       Abstract methods - must be implemented in derived classes
    */

-    protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
@ -18,26 +18,30 @@ import java.util.*;
 * Time: 10:06 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
+public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
    final PairHMMIndelErrorModel pairModel;
    final LinkedHashMap<Allele, Haplotype> haplotypeMap;
    final ReferenceContext refContext;
    final int eventLength;
    double[][] readHaplotypeLikelihoods;

-    public PoolIndelGenotypeLikelihoods(final List<Allele> alleles, 
-                                        final double[] logLikelihoods, 
-                                        final int ploidy,
-                                        final HashMap<String, ErrorModel> perLaneErrorModels, 
-                                        final boolean ignoreLaneInformation,
-                                        final PairHMMIndelErrorModel pairModel, 
-                                        final LinkedHashMap<Allele, Haplotype> haplotypeMap,
-                                        final ReferenceContext referenceContext) {
+    final byte refBase;
+
+    public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
+                                                 final double[] logLikelihoods,
+                                                 final int ploidy,
+                                                 final HashMap<String, ErrorModel> perLaneErrorModels,
+                                                 final boolean ignoreLaneInformation,
+                                                 final PairHMMIndelErrorModel pairModel,
+                                                 final LinkedHashMap<Allele, Haplotype> haplotypeMap,
+                                                 final ReferenceContext referenceContext) {
        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
        this.pairModel = pairModel;
        this.haplotypeMap = haplotypeMap;
        this.refContext = referenceContext;
        this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
+        // todo - not needed if indel alleles have base at current position
+        this.refBase = referenceContext.getBase();
    }

    // -------------------------------------------------------------------------------------
@ -138,10 +142,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
        List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());

        if (!hasReferenceSampleData) {
-            final int numHaplotypes = haplotypeMap.size();
-
            final int readCounts[] = new int[pileup.getNumberOfElements()];
-            readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
+            readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
            n = readHaplotypeLikelihoods.length;
        } else {
            Allele refAllele = null;
@ -161,7 +163,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
                int idx =0;
                for (Allele allele : alleles) {
                    int cnt = numSeenBases.get(idx);
-                    numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0));
+                    numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
                }

                n++;
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@ -32,17 +32,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
 import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
    private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;

    private PairHMMIndelErrorModel pairModel;
-    private boolean allelesArePadded = false;
 /*
    private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
            new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
@ -60,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
    }
       */

-    protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
+    protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
        super(UAC, logger);


@ -70,20 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
    }


-    public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
-        return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
-    }
-
-
-
-    protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
                                                                               final boolean useBQAedPileup,
                                                                               final ReferenceContext ref,
                                                                               final boolean ignoreLaneInformation){
-        return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
+        return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
    }

    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
@ -94,12 +85,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
                                                  final List<Allele> allAllelesToUse){


-        final Pair<List<Allele>,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);
-        List<Allele> alleles = pair.first;
+        List<Allele> alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true);

        if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
            alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
-        allelesArePadded = pair.second;
        if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
            IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
            haplotypeMap.clear();
@ -131,6 +120,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
    protected int getEndLocation(final RefMetaDataTracker tracker,
                                          final ReferenceContext ref,
                                          final List<Allele> allelesToUse) {
-        return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded);
+        return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1;
    }
 }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -22,7 +23,7 @@ import static java.lang.Math.pow;
 * and posteriors given a pile of bases and quality scores
 *
 */
-public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
+public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {

    final List<Allele> myAlleles;
    final int[] alleleIndices;
@ -41,14 +42,19 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
     * @param useBQAedPileup    Use BAQed pileup
     * @param ignoreLaneInformation  If true, lane info is ignored
     */
-    public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
-                                      final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
+    public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
+                                               final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
        super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
        this.useBAQedPileup = useBQAedPileup;

        myAlleles = new ArrayList<Allele>(alleles);

-        refByte = alleles.get(0).getBases()[0];  // by construction, first allele in list is always ref!
+        Allele refAllele = alleles.get(0);
+        //sanity check: by construction, first allele should ALWAYS be the reference alleles
+        if (!refAllele.isReference())
+            throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
+
+        refByte = refAllele.getBases()[0];  // by construction, first allele in list is always ref!

        if (myAlleles.size() < BaseUtils.BASES.length) {
            // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;

 import java.util.*;

-public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
+public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {


-    protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC,  Logger logger) {
+    protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);

    }

-    protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
+    protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
                                                                               final double[] logLikelihoods,
                                                                               final int ploidy,
                                                                               final HashMap<String, ErrorModel> perLaneErrorModels,
                                                                               final boolean useBQAedPileup,
                                                                               final ReferenceContext ref,
                                                                               final boolean ignoreLaneInformation) {
-        return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
+        return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
    }

    protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.variantcontext.*;
@ -57,8 +56,12 @@ public class GenotypingEngine {

    // This function is the streamlined approach, currently not being used
    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
-                                                                                                                             final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                                                             final ArrayList<Haplotype> haplotypes,
+                                                                                                                             final byte[] ref,
+                                                                                                                             final GenomeLoc refLoc,
+                                                                                                                             final GenomeLoc activeRegionWindow,
+                                                                                                                             final GenomeLocParser genomeLocParser ) {
        // Prepare the list of haplotype indices to genotype
        final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();

@ -184,8 +187,13 @@ public class GenotypingEngine {
    }

    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
-                                                                                                                               final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                                                               final ArrayList<Haplotype> haplotypes,
+                                                                                                                               final byte[] ref,
+                                                                                                                               final GenomeLoc refLoc,
+                                                                                                                               final GenomeLoc activeRegionWindow,
+                                                                                                                               final GenomeLocParser genomeLocParser,
+                                                                                                                               final ArrayList<VariantContext> activeAllelesToGenotype ) {

        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();

@ -220,7 +228,6 @@ public class GenotypingEngine {
            }
        }

-
        // Walk along each position in the key set and create each event to be outputted
        for( final int loc : startPosKeySet ) {
            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
@ -317,7 +324,7 @@ public class GenotypingEngine {
    }

    protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
-        final int MAX_SIZE_TO_COMBINE = 10;
+        final int MAX_SIZE_TO_COMBINE = 15;
        final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
        if( startPosKeySet.size() <= 1 ) { return; }

@ -334,10 +341,10 @@ public class GenotypingEngine {
                    boolean isBiallelic = true;
                    VariantContext thisVC = null;
                    VariantContext nextVC = null;
-                    int x11 = 0;
-                    int x12 = 0;
-                    int x21 = 0;
-                    int x22 = 0;
+                    double x11 = Double.NEGATIVE_INFINITY;
+                    double x12 = Double.NEGATIVE_INFINITY;
+                    double x21 = Double.NEGATIVE_INFINITY;
+                    double x22 = Double.NEGATIVE_INFINITY;

                    for( final Haplotype h : haplotypes ) {
                        // only make complex substitutions out of consecutive biallelic sites
@ -360,21 +367,24 @@ public class GenotypingEngine {
                            }
                        }
                        // count up the co-occurrences of the events for the R^2 calculation
-                        // BUGBUG: use haplotype likelihoods per-sample to make this more accurate
-                        if( thisHapVC == null ) {
-                            if( nextHapVC == null ) { x11++; }
-                            else { x12++; }
-                        } else {
-                            if( nextHapVC == null ) { x21++; }
-                            else { x22++; }
+                        final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
+                        haplotypeList.add(h);
+                        for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
+                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
+                            if( thisHapVC == null ) {
+                                if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
+                                else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
+                            } else {
+                                if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
+                                else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
+                            }
                        }
                    }
                    if( thisVC == null || nextVC == null ) {
                        continue;
-                        //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
                    }
                    if( isBiallelic ) {
-                        final double R2 = calculateR2LD( x11, x12, x21, x22 );
+                        final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
                        if( DEBUG ) {
                            System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
                            System.out.println("-- " + thisVC);
@ -419,24 +429,21 @@ public class GenotypingEngine {
    protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
        final int thisStart = thisVC.getStart();
        final int nextStart = nextVC.getStart();
-        byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
-        byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} );
+        byte[] refBases = new byte[]{};
+        byte[] altBases = new byte[]{};
        refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
        altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
-        for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
+        int locus;
+        for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
            final byte refByte = ref[locus - refLoc.getStart()];
            refBases = ArrayUtils.add(refBases, refByte);
            altBases = ArrayUtils.add(altBases, refByte);
        }
-        if( nextVC.hasReferenceBaseForIndel() ) {
-            refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel());
-            altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel());
-        }
-        refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
+        refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
        altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());

        int iii = 0;
-        if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
+        if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
            while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
        }
        final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -445,12 +452,11 @@ public class GenotypingEngine {
        return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
    }

-    @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
-    protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
-        final int total = x11 + x12 + x21 + x22;
-        final double pa1b1 = ((double) x11) / ((double) total);
-        final double pa1b2 = ((double) x12) / ((double) total);
-        final double pa2b1 = ((double) x21) / ((double) total);
+    protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
+        final double total = x11 + x12 + x21 + x22;
+        final double pa1b1 = x11 / total;
+        final double pa1b2 = x12 / total;
+        final double pa2b1 = x21 / total;
        final double pa1 = pa1b1 + pa1b2;
        final double pb1 = pa1b1 + pa2b1;
        return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
@ -530,35 +536,37 @@ public class GenotypingEngine {
            final int elementLength = ce.getLength();
            switch( ce.getOperator() ) {
                case I:
-                    final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
-                    boolean allN = true;
-                    for( final byte b : insertionBases ) {
-                        if( b != (byte) 'N' ) {
-                            allN = false;
-                            break;
-                        }
+                {
+                    final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
+                    final int insertionStart = refLoc.getStart() + refPos - 1;
+                    final byte refByte = ref[refPos-1];
+                    if( BaseUtils.isRegularBase(refByte) ) {
+                        insertionAlleles.add( Allele.create(refByte, true) );
                    }
-                    if( !allN ) {
-                        final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
-                        final int insertionStart = refLoc.getStart() + refPos - 1;
-                        if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
-                            insertionAlleles.add( Allele.create(ref[refPos-1], true) );
-                            insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
-                            vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
-                        } else {
-                            insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
+                    if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
+                        insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
+                    } else {
+                        byte[] insertionBases = new byte[]{};
+                        insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
+                        insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
+                        if( BaseUtils.isAllRegularBases(insertionBases) ) {
                            insertionAlleles.add( Allele.create(insertionBases, false) );
-                            vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
                        }
-
+                    }
+                    if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele
+                        vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
                    }
                    alignmentPos += elementLength;
                    break;
+                }
                case S:
+                {
                    alignmentPos += elementLength;
                    break;
+                }
                case D:
-                    final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
+                {
+                    final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength );  // add padding base
                    final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
                    final int deletionStart = refLoc.getStart() + refPos - 1;
                    // BUGBUG: how often does this symbolic deletion allele case happen?
@ -568,13 +576,20 @@ public class GenotypingEngine {
                    //    deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
                    //    vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
                    //} else {
+                    final byte refByte = ref[refPos-1];
+                    if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) {
                        deletionAlleles.add( Allele.create(deletionBases, true) );
-                        deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
-                        vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
+                        deletionAlleles.add( Allele.create(refByte, false) );
+                        vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make());
+                    }
                    //}
                    refPos += elementLength;
                    break;
+                }
                case M:
+                case EQ:
+                case X:
+                {
                    int numSinceMismatch = -1;
                    int stopOfMismatch = -1;
                    int startOfMismatch = -1;
@ -597,11 +612,13 @@ public class GenotypingEngine {
                        if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
                            final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
                            final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
-                            final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
-                            snpAlleles.add( Allele.create( refBases, true ) );
-                            snpAlleles.add( Allele.create( mismatchBases, false ) );
-                            final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
-                            vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
+                            if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) {
+                                final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
+                                snpAlleles.add( Allele.create( refBases, true ) );
+                                snpAlleles.add( Allele.create( mismatchBases, false ) );
+                                final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
+                                vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
+                            }
                            numSinceMismatch = -1;
                            stopOfMismatch = -1;
                            startOfMismatch = -1;
@ -611,6 +628,7 @@ public class GenotypingEngine {
                        alignmentPos++;
                    }
                    break;
+                }
                case N:
                case H:
                case P:
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import com.google.java.contract.Ensures;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -56,6 +58,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.fragments.FragmentUtils;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.*;
@ -103,7 +106,7 @@ import java.util.*;

@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS)
-@ActiveRegionExtension(extension=65, maxRegion=275)
+@ActiveRegionExtension(extension=65, maxRegion=300)
 public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {

    /**
@ -126,9 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
    protected int MIN_PRUNE_FACTOR = 1;

+    @Advanced
    @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
    protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;

+    @Advanced
    @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
    protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;

@ -139,9 +144,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
    protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;

-    @Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
-    protected boolean USE_EXPANDED_TRIGGER_SET = false;
-
    @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
    protected boolean USE_ALLELES_TRIGGER = false;

@ -188,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    protected String[] annotationClassesToUse = { "Standard" };

    @ArgumentCollection
-    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
+    private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection();

    // the calculation arguments
    private UnifiedGenotyperEngine UG_engine = null;
@ -239,12 +241,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
        samplesList.addAll( samples );
        // initialize the UnifiedGenotyper Engine which is used to call into the exact model
-        UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
+        final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
        UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
        UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
-        UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING);
+        UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
        UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);

        // initialize the output VCF header
@ -308,8 +310,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    public boolean wantsNonPrimaryReads() { return true; }

    @Override
-    @Ensures({"result >= 0.0", "result <= 1.0"})
-    public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
+    @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
+    public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {

        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
            for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
@ -318,56 +320,52 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
                }
            }
            if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
-                return 1.0;
+                return new ActivityProfileResult(1.0);
            }
        }

        if( USE_ALLELES_TRIGGER ) {
-            return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
+            return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
        }

-        if( context == null ) { return 0.0; }
+        if( context == null ) { return new ActivityProfileResult(0.0); }

        final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
        noCall.add(Allele.NO_CALL);

        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
        final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
-        for( final String sample : splitContexts.keySet() ) {
+        final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
+        for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
            final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
            Arrays.fill(genotypeLikelihoods, 0.0);

-            for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) {
-                final byte qual = ( USE_EXPANDED_TRIGGER_SET ?
-                                        ( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() )
-                                        : p.getQual() );
-                if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) {
+            for( final PileupElement p : sample.getValue().getBasePileup() ) {
+                final byte qual = p.getQual();
+                if( p.isDeletion() || qual > (byte) 18) {
                    int AA = 0; final int AB = 1; int BB = 2;
-                    if( USE_EXPANDED_TRIGGER_SET ) {
-                        if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ||
-                                (!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) {
-                            AA = 2;
-                            BB = 0;
-                        }
-                    } else {
-                        if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
-                            AA = 2;
-                            BB = 0;
+                    if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
+                        AA = 2;
+                        BB = 0;
+                        if( p.isNextToSoftClip() ) {
+                            averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
                        }
                    }
-                    genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
-                    genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
-                    genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
+                    genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual);
+                    genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
+                    genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
                }
            }
-            genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
+            genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
        }

        final ArrayList<Allele> alleles = new ArrayList<Allele>();
        alleles.add( FAKE_REF_ALLELE );
        alleles.add( FAKE_ALT_ALLELE );
        final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
-        return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
+        final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
+
+        return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
    }

    //---------------------------------------------------------------------------------------------------------------
@ -416,45 +414,48 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem

        for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
                ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
-                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
+                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
                  : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
            if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }

            final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
            final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
-
-            // add some custom annotations to the calls
            final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
-            // Calculate the number of variants on the haplotype
-            int maxNumVar = 0;
-            for( final Allele allele : callResult.getFirst().getAlleles() ) {
-                if( !allele.isReference() ) {
-                    for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
-                        final int numVar = haplotype.getEventMap().size();
-                        if( numVar > maxNumVar ) { maxNumVar = numVar; }
+
+            if( !GENOTYPE_FULL_ACTIVE_REGION ) {
+                // add some custom annotations to the calls
+
+                // Calculate the number of variants on the haplotype
+                int maxNumVar = 0;
+                for( final Allele allele : callResult.getFirst().getAlleles() ) {
+                    if( !allele.isReference() ) {
+                        for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
+                            final int numVar = haplotype.getEventMap().size();
+                            if( numVar > maxNumVar ) { maxNumVar = numVar; }
+                        }
                    }
                }
-            }
-            // Calculate the event length
-            int maxLength = 0;
-            for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
-                final int length = a.length() - annotatedCall.getReference().length();
-                if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
-            }
+                // Calculate the event length
+                int maxLength = 0;
+                for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
+                    final int length = a.length() - annotatedCall.getReference().length();
+                    if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
+                }

-            myAttributes.put("NVH", maxNumVar);
-            myAttributes.put("NumHapEval", bestHaplotypes.size());
-            myAttributes.put("NumHapAssembly", haplotypes.size());
-            myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
-            myAttributes.put("EVENTLENGTH", maxLength);
-            myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
-            myAttributes.put("extType", annotatedCall.getType().toString() );
+                myAttributes.put("NVH", maxNumVar);
+                myAttributes.put("NumHapEval", bestHaplotypes.size());
+                myAttributes.put("NumHapAssembly", haplotypes.size());
+                myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
+                myAttributes.put("EVENTLENGTH", maxLength);
+                myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
+                myAttributes.put("extType", annotatedCall.getType().toString() );

-            //if( likelihoodCalculationEngine.haplotypeScore != null ) {
-            //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
-            //}
-            if( annotatedCall.hasAttribute("QD") ) {
-                myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
+                //if( likelihoodCalculationEngine.haplotypeScore != null ) {
+                //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
+                //}
+                if( annotatedCall.hasAttribute("QD") ) {
+                    myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
+                }
            }

            vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
@ -493,7 +494,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    //---------------------------------------------------------------------------------------------------------------

    private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
-        if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
+        if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads:    (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
        final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
        final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
        activeRegion.clearReads();
@ -522,7 +523,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
    private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
        final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
        for( final GATKSAMRecord rec : activeRegion.getReads() ) {
-            if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
+            if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
                readsToRemove.add(rec);
            }
        }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
@ -82,11 +82,11 @@ public class KBestPaths {
        }
    }

-    protected static class PathComparatorLowestEdge implements Comparator<Path> {
-        public int compare(final Path path1, final Path path2) {
-            return path2.lowestEdge - path1.lowestEdge;
-        }
-    }
+    //protected static class PathComparatorLowestEdge implements Comparator<Path> {
+    //    public int compare(final Path path1, final Path path2) {
+    //        return path2.lowestEdge - path1.lowestEdge;
+    //    }
+    //}

    public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
        if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -99,7 +99,7 @@ public class KBestPaths {
            }
        }

-        Collections.sort(bestPaths, new PathComparatorLowestEdge() );
+        Collections.sort(bestPaths, new PathComparatorTotalScore() );
        Collections.reverse(bestPaths);
        return bestPaths.subList(0, Math.min(k, bestPaths.size()));
    }
@ -114,8 +114,8 @@ public class KBestPaths {
        if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
            if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
                // clean out some low scoring paths
-                Collections.sort(bestPaths, new PathComparatorLowestEdge() );
-                for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
+                Collections.sort(bestPaths, new PathComparatorTotalScore() );
+                for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
            }
            bestPaths.add(path);
        } else if( n.val > 10000) {
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -30,6 +30,7 @@ import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

@ -50,18 +51,17 @@ public class LikelihoodCalculationEngine {
    }

    public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
-        final int numHaplotypes = haplotypes.size();

        int X_METRIC_LENGTH = 0;
-        for( final String sample : perSampleReadList.keySet() ) {
-            for( final GATKSAMRecord read : perSampleReadList.get(sample) ) {
+        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
+            for( final GATKSAMRecord read : sample.getValue() ) {
                final int readLength = read.getReadLength();
                if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
            }
        }
        int Y_METRIC_LENGTH = 0;
-        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
-            final int haplotypeLength = haplotypes.get(jjj).getBases().length;
+        for( final Haplotype h : haplotypes ) {
+            final int haplotypeLength = h.getBases().length;
            if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
        }

@ -90,8 +90,10 @@ public class LikelihoodCalculationEngine {
        final int numHaplotypes = haplotypes.size();
        final int numReads = reads.size();
        final double[][] readLikelihoods = new double[numHaplotypes][numReads];
+        final int[][] readCounts = new int[numHaplotypes][numReads];
        for( int iii = 0; iii < numReads; iii++ ) {
            final GATKSAMRecord read = reads.get(iii);
+            final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);

            final byte[] overallGCP = new byte[read.getReadLength()];
            Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
@ -103,7 +105,7 @@ public class LikelihoodCalculationEngine {
                readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
                //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
                //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
-                readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
+                readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
            }

            for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
@ -114,10 +116,11 @@ public class LikelihoodCalculationEngine {
                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
                        readQuals, readInsQuals, readDelQuals, overallGCP,
                        haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
+                readCounts[jjj][iii] = readCount;
            }
        }
        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
-            haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] );
+            haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
        }
    }

@ -142,10 +145,20 @@ public class LikelihoodCalculationEngine {
        }
        return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
    }
-    
+
+    // This function takes just a single sample and a haplotypeMapping
    @Requires({"haplotypeMapping.size() > 0"})
    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+        final TreeSet<String> sampleSet = new TreeSet<String>();
+        sampleSet.add(sample);
+        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
+    }
+
+    // This function takes a set of samples to pool over and a haplotypeMapping
+    @Requires({"haplotypeMapping.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {

        final int numHaplotypes = haplotypeMapping.size();
        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
@ -154,17 +167,21 @@ public class LikelihoodCalculationEngine {
        }

        // compute the diploid haplotype likelihoods
+        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
        for( int iii = 0; iii < numHaplotypes; iii++ ) {
            for( int jjj = 0; jjj <= iii; jjj++ ) {                
                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
-                    final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
-                        final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
                        double haplotypeLikelihood = 0.0;
-                        for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
-                            // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
-                            // First term is approximated by Jacobian log with table lookup.
-                            haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
+                        for( final String sample : samples ) {
+                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
+                            final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
+                            final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
+                            for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                                // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                                // First term is approximated by Jacobian log with table lookup.
+                                haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
+                            }
                        }
                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
                    }
@ -176,48 +193,6 @@ public class LikelihoodCalculationEngine {
        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );        
    }

-    @Requires({"haplotypes.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final Set<String> samples ) {
-        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
-        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
-        for( final Haplotype h : haplotypes ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            list.add(h);
-            haplotypeMapping.add(list);
-        }
-
-        final int numHaplotypes = haplotypeMapping.size();
-        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
-        for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
-        }
-
-        // compute the diploid haplotype likelihoods
-        for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            for( int jjj = 0; jjj <= iii; jjj++ ) {
-                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
-                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
-                        double haplotypeLikelihood = 0.0;
-                        for( final String sample : samples ) {
-                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
-                            final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
-                            for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
-                                // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
-                                // First term is approximated by Jacobian log with table lookup.
-                                haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
-                            }
-                        }
-                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
-                    }
-                }
-            }
-        }
-
-        // normalize the diploid likelihoods matrix
-        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
-    }
-
    @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
    @Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"})
    protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) {
@ -306,12 +281,19 @@ public class LikelihoodCalculationEngine {
        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
-        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together
+        // set up the default 1-to-1 haplotype mapping object
+        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
+        for( final Haplotype h : haplotypes ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            list.add(h);
+            haplotypeMapping.add(list);
+        }
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together

        int hap1 = 0;
        int hap2 = 0;
        //double bestElement = Double.NEGATIVE_INFINITY;
-        final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
+        final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
        while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
            double maxElement = Double.NEGATIVE_INFINITY;
            for( int iii = 0; iii < numHaplotypes; iii++ ) {
@ -343,9 +325,9 @@ public class LikelihoodCalculationEngine {
    public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
        final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
        final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
-        for( final String sample : perSampleReadList.keySet() ) {
+        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
            final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
-            final ArrayList<GATKSAMRecord> readsForThisSample = perSampleReadList.get(sample);
+            final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
            for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
                final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
@ -355,7 +337,7 @@ public class LikelihoodCalculationEngine {
                    for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
                        double maxLikelihood = Double.NEGATIVE_INFINITY;
                        for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
-                            final double likelihood = h.getReadLikelihoods(sample)[iii];
+                            final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
                            if( likelihood > maxLikelihood ) {
                                maxLikelihood = likelihood;
                            }
@ -390,13 +372,13 @@ public class LikelihoodCalculationEngine {
                readList = new ArrayList<GATKSAMRecord>();
                alleleReadMap.put(Allele.NO_CALL, readList);
            }
-            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) {
+            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
                    readList.add(read);
                }
            }
-            returnMap.put(sample, alleleReadMap);
+            returnMap.put(sample.getKey(), alleleReadMap);
        }
        return returnMap;
    }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
 import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.SWPairwiseAlignment;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -68,7 +69,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
        return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
    }

-    private void createDeBruijnGraphs( final ArrayList<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
+    protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
        graphs.clear();

        // create the graph
@ -161,7 +162,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
        }
    }

-    private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final ArrayList<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
+    private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Collection<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
        final byte[] refSequence = refHaplotype.getBases();
        if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
            final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
@ -183,6 +184,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
        for( final GATKSAMRecord read : reads ) {
            final byte[] sequence = read.getReadBases();
            final byte[] qualities = read.getBaseQualities();
+            final byte[] reducedReadCounts = read.getReducedReadCounts();  // will be null if read is not readuced
            if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
                final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
                for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {                    
@ -194,6 +196,14 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
                            break;
                        }
                    }
+                    int countNumber = 1;
+                    if (read.isReducedRead()) {
+                        // compute mean number of reduced read counts in current kmer span
+                        final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
+                        // precise rounding can make a difference with low consensus counts
+                        countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
+                    }
+
                    if( !badKmer ) {
                        // get the kmers
                        final byte[] kmer1 = new byte[KMER_LENGTH];
@ -201,7 +211,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
                        final byte[] kmer2 = new byte[KMER_LENGTH];
                        System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);

-                        addKmersToGraph(graph, kmer1, kmer2, false);
+                        for (int k=0; k < countNumber; k++)
+                            addKmersToGraph(graph, kmer1, kmer2, false);
                    }
                }
            }
@ -230,7 +241,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
        return true;
    }

-    private void printGraphs() {
+    protected void printGraphs() {
        int count = 0;
        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
            GRAPH_WRITER.println("digraph kmer" + count++ +" {");
@ -281,7 +292,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
                final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
                    if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
-                        final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
+                        final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
                        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
                            final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
                            if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -311,7 +322,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
    }

    private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
-        //final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
+        if( haplotype == null ) { return false; }
+
        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
        haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
        haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@ -50,20 +50,21 @@ public class BQSRIntegrationTest extends WalkerTest {
        String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
        String HiSeqInterval = "chr1:10,000,000-10,100,000";
        return new Object[][]{
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
-                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
-                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")},
+                {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")},
+                {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")},
+                {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")},
        };
    }

@ -74,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest {
                Arrays.asList(params.md5));
        executeTest("testBQSR-"+params.args, spec).getFirst();

-        WalkerTestSpec specNT2 = new WalkerTestSpec(
-                params.getCommandLine() + " -nt 2",
-                Arrays.asList(params.md5));
-        executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
+        // TODO -- re-enable once parallelization is fixed in BaseRecalibrator
+        //WalkerTestSpec specNT2 = new WalkerTestSpec(
+        //        params.getCommandLine() + " -nt 2",
+        //        Arrays.asList(params.md5));
+        //executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
    }

    @Test
@ -94,6 +96,20 @@ public class BQSRIntegrationTest extends WalkerTest {
        executeTest("testBQSRFailWithoutDBSNP", spec);
    }

+    @Test
+    public void testBQSRFailWithSolidNoCall() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                " -T BaseRecalibrator" +
+                        " -R " + b36KGReference +
+                        " -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
+                        " -L 1:50,000-80,000" +
+                        " --no_plots" +
+                        " -o %s",
+                1, // just one output file
+                UserException.class);
+        executeTest("testBQSRFailWithSolidNoCall", spec);
+    }
+
    private static class PRTest {
        final String args;
        final String md5;
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java
@ -19,7 +19,7 @@ import java.util.Arrays;
 * Time: 7:44 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolAFCalculationModelUnitTest extends BaseTest {
+public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {

    static double[] AA1, AB1, BB1;
    static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
    public void testGLs(GetGLsTest cfg) {

        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
-        final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
+        final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
        double[] priors = new double[len];  // flat priors

-        PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
+        GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
        int nameIndex = 1;
        for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
            int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
@ -41,7 +41,7 @@ import java.io.PrintStream;
 import java.util.*;


-public class PoolGenotypeLikelihoodsUnitTest {
+public class GeneralPloidyGenotypeLikelihoodsUnitTest {

    final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
    final Logger logger = Logger.getLogger(Walker.class);
@ -49,11 +49,18 @@ public class PoolGenotypeLikelihoodsUnitTest {
    private static final boolean SIMULATE_NOISY_PILEUP = false;
    private static final int NUM_SIMULATED_OBS = 10;

+    void PoolGenotypeLikelihoodsUnitTest() {
+        UAC.minQualityScore = 5;
+        UAC.maxQualityScore = 40;
+        UAC.phredScaledPrior = (byte)20;
+        UAC.minPower = 0.0;
+
+    }
    @Test
    public void testStoringLikelihoodElements() {


-        // basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
+        // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back

        int ploidy = 20;
        int numAlleles = 4;
@ -71,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        for (int k=0; k < gls.length; k++)
            gls[k]= (double)k;

-        PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
+        GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
        double[] glnew = gl.getLikelihoods();

        Assert.assertEquals(gls, glnew);
@ -83,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

        for (int ploidy = 2; ploidy < 10; ploidy++) {
            for (int nAlleles = 2; nAlleles < 10; nAlleles++)
-                Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
+                Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
                        GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
        }

@ -95,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        // create iterator, compare linear index given by iterator with closed form function
        int numAlleles = 4;
        int ploidy = 2;
-        PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);

        while(iterator.hasNext()) {
            System.out.format("\n%d:",iterator.getLinearIndex());
@ -104,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
                System.out.format("%d ",aa);


-            int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
+            int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
            System.out.format("Computed idx = %d\n",computedIdx);
            iterator.next();
        }
@ -133,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
        allelesToSubset.add(Allele.create("A",false));
        allelesToSubset.add(Allele.create("C",false));

-        double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
+        double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
                originalAlleles, allelesToSubset);


@ -163,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
    @Test
    public void testIndexIterator() {
        int[] seed = new int[]{1,2,3,4};
-        PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
        // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
        Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);

@ -221,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {

    }

-    private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
-        PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
+    private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
+        GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);

        while(iterator.hasNext()) {
            int[] a =  iterator.getCurrentVector();
-            int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
+            int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
            if (VERBOSE)   {
                System.out.format("%d:",iterator.getLinearIndex());
                for (int i=0; i < seed.length; i++)
@ -251,8 +258,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
    @Test
    public void testErrorModel() {
        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
-        final byte minQ = 5;
-        final byte maxQ = 40;
        final byte refByte = refPileupTestProvider.getRefByte();
        final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
        final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
@ -270,7 +275,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
                // get artificial alignment context for ref sample - no noise
                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
-                final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0);
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();

                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -287,19 +292,17 @@ public class PoolGenotypeLikelihoodsUnitTest {
    @Test
    public void testIndelErrorModel() {
        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
-        final byte minQ = 5;
-        final byte maxQ = 40;
        final byte refByte = refPileupTestProvider.getRefByte();
        final String altBases = "TCA";
        final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
        final List<Allele> trueAlleles = new ArrayList<Allele>();
-        trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
-        trueAlleles.add(Allele.create("TC", false));
+        trueAlleles.add(Allele.create(refByte, true));
+        trueAlleles.add(Allele.create((char)refByte + "TC", false));

        final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
        final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
                refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
-                genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
+                genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();


        final int[] matchArray = {95, 995, 9995, 10000};
@ -311,9 +314,9 @@ public class PoolGenotypeLikelihoodsUnitTest {
                // get artificial alignment context for ref sample - no noise
                // CASE 1: Test HET insertion
                // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
-                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
+                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(1+altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
-                final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0);
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();

                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -329,12 +332,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
        // create deletion VC
        final int delLength = 4;
        final List<Allele> delAlleles = new ArrayList<Allele>();
-        delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
-        delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
+        delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
+        delAlleles.add(Allele.create(refByte, false));

        final VariantContext refDeletionVC =  new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
                refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
-                genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
+                genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();

        for (int matches: matchArray) {
            for (int mismatches: mismatchArray) {
@ -343,7 +346,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
                // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
                Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
                final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
-                final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0);
+                final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
                final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();

                final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
@ -388,8 +391,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
        final byte refByte = readPileupTestProvider.getRefByte();
        final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';

-        final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte);
-        final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte);

        final List<Allele> allAlleles = new ArrayList<Allele>();  // this contains only ref Allele up to now
        final Set<String> laneIDs = new TreeSet<String>();
@ -407,17 +408,28 @@ public class PoolGenotypeLikelihoodsUnitTest {
        for (String laneID : laneIDs)
            noisyErrorModels.put(laneID, Q30ErrorModel);

+         // all first ref allele
+        allAlleles.add(Allele.create(refByte,true));
        for (byte b: BaseUtils.BASES) {
-            if (refByte == b)
-                allAlleles.add(Allele.create(b,true));
-            else
+            if (refByte != b)
                allAlleles.add(Allele.create(b, false));
        }

+        final int refIdx = 0;
+        int altIdx = -1;
+
+        for (int k=0; k < allAlleles.size(); k++)
+            if (altByte == allAlleles.get(k).getBases()[0]) {
+                altIdx = k;
+                break;
+            }
+
+
+
        PrintStream out = null;
        if (SIMULATE_NOISY_PILEUP) {
            try {
-                out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table"));
+                out = new PrintStream(new File("GLUnitTest.table"));
    //                            out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table"));
            }
            catch (Exception e) {}
@ -441,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

                    // get now likelihoods for this

-                    final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
+                    final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
                    final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
                    if (VERBOSE) {
                        System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -470,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {

                            // get now likelihoods for this

-                            final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
+                            final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
                            noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
                            mlPair = noisyGL.getMostLikelyACCount();

--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@ -12,50 +12,66 @@ import org.testng.annotations.Test;
 * Time: 11:28 AM
 * To change this template use File | Settings | File Templates.
 */
-public class PoolCallerIntegrationTest extends WalkerTest {
+public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
    final static String REF = b37KGReference;
    final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
    final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
    final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf";
    final String REFSAMPLE_NAME = "NA12878";
-    final String MTINTERVALS = "MT";
+    final String MTINTERVALS = "MT:1-3000";
    final String LSVINTERVALS = "20:40,000,000-41,000,000";
    final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf";
    final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
    final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
+
    private void PC_MT_Test(String bam, String args, String name, String md5) {
-        final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
                REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
        executeTest("testPoolCaller:"+name+" args=" + args, spec);
    }

    private void PC_LSV_Test(String args, String name, String model, String md5) {
-        final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
                REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
        executeTest("testPoolCaller:"+name+" args=" + args, spec);
    }

-    @Test
+    private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
+        final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
+                REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testPoolCaller:"+name+" args=" + args, spec);
+    }
+
+    @Test(enabled = true)
    public void testBOTH_GGA_Pools() {
-        PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
+        PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93");
    }

-    @Test
+    @Test(enabled = true)
    public void testINDEL_GGA_Pools() {
-        PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
+        PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b");
    }

-    @Test
+    @Test(enabled = true)
+    public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
+        PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d");
+    }
+
+    @Test(enabled = true)
+    public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
+        PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4");
+    }
+
+    @Test(enabled = true)
    public void testMT_SNP_DISCOVERY_sp4() {
-         PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5");
+         PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459");
    }

-    @Test
+    @Test(enabled = true)
    public void testMT_SNP_GGA_sp10() {
-
-        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323");
+        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7");
    }
-
 }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // SNP + ref + SNP = MNP with ref base gap
        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest {
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // insertion + SNP
-        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest {
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // SNP + insertion
        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // deletion + SNP
-        thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make();
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make();
        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest {
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // SNP + deletion
        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // insertion + deletion = MNP
-        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
        truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // insertion + deletion
-        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // insertion + insertion
-        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make();
+        thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make();
        truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());

        // deletion + deletion
-        thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make();
-        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make();
+        thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
+        nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make();
        truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make();
        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
        logger.warn(truthVC + " == " + mergedVC);
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
+
+        // deletion + insertion (abutting)
+        thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
+        nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
+        truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
+        mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
+        logger.warn(truthVC + " == " + mergedVC);
+        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
+        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
+        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());

        // complex + complex
        thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest {
        Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
        Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
        Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
-        Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel());
-        Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel());
    }
    
    /**
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "882ece8391cfec30ab658970a487d078");
+        HCTest(CEUTRIO_BAM, "", "6b30c7e1b6bbe80d180d9d67441cec12");
    }

    @Test
    public void testHaplotypeCallerSingleSample() {
-        HCTest(NA12878_BAM, "", "6f458e04251bc4c09b5b544d46f19d68");
+        HCTest(NA12878_BAM, "", "4cdfbfeadef00725974828310558d7d4");
    }

    @Test
    public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "c172791007f867bf3b975d4194564d9e");
+        HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "6183fb6e374976d7087150009685e043");
    }

    private void HCTestComplexVariants(String bam, String args, String md5) {
@ -41,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(CEUTRIO_BAM, "", "94186811016b332a58c150df556278f8");
+        HCTestComplexVariants(CEUTRIO_BAM, "", "ab7593a7a60a2e9a66053572f1718df1");
    }
-
 }

--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@ -95,9 +95,10 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
            ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
            for( int iii = 1; iii <= 3; iii++) {
                Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
+                int readCount = 1;
                if( readLikelihood != null ) {
                    Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
-                    haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood});
+                    haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount});
                    haplotypes.add(haplotype);
                }
            }
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
@ -7,6 +7,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
 */

 import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -18,6 +20,7 @@ import org.testng.annotations.Test;

 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.PrintStream;
 import java.util.*;

 public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
@ -143,6 +146,44 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
        Assert.assertTrue(graphEquals(graph, expectedGraph));
    }

+    @Test(enabled=false)
+// not ready yet
+    public void testBasicGraphCreation() {
+        final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
+        final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
+        final String altBase = (refBase==(byte)'A'?"C":"A");
+        final int matches = 50;
+        final int mismatches = 50;
+        Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
+        PrintStream graphWriter = null;
+
+        try{
+            graphWriter = new PrintStream("du.txt");
+        } catch (Exception e) {}
+
+
+        SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
+        final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
+        refHaplotype.setIsReference(true);
+        assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
+
+/*        // clean up the graphs by pruning and merging
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
+            //eliminateNonRefPaths( graph );
+            SimpleDeBruijnAssembler.mergeNodes( graph );
+        }
+  */
+        if( graphWriter != null ) {
+            assembler.printGraphs();
+        }
+
+        int k=2;
+
+        // find the best paths in the graphs
+    //    return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
+
+    }
    @Test(enabled = true)
    public void testEliminateNonRefPaths() {
        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
--- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@ -1,8 +1,18 @@
 library("ggplot2")
+library(gplots)
+library("reshape")
+library("grid")
 library("tools") #For compactPDF in R 2.13+
+library(gsalib)

-args <- commandArgs(TRUE)
+
+if ( interactive() ) {
+  args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA)
+} else {
+  args <- commandArgs(TRUE)
+} 
 data <- read.csv(args[1])
+gsa.report <- gsa.read.gatkreport(args[2])
 data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType))))

 numRG = length(unique(data$ReadGroup))
@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) {    # for each covariate in turn
    
    p <- ggplot(d, aes(x=CovariateValue)) +
      xlab(paste(cov,"Covariate")) +
-      ylab("Number of Observations") +
+      ylab("No. of Observations (area normalized)") +
      blankTheme
-    d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) +     
-      scale_y_continuous(formatter="comma")
-    
+    d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity")
+    d <- d + scale_fill_manual(values=c("maroon1","blue"))
+    d <- d + facet_grid(.~EventType) 
+#    d <- d + scale_y_continuous(formatter="comma")
  }
 }

-pdf(args[2],height=9,width=15)
+if ( ! is.na(args[3]) )
+  pdf(args[3],height=9,width=15)
+
+#frame()
+textplot(gsa.report$Arguments, show.rownames=F)
+title(
+  main="GATK BaseRecalibration report",
+  sub=date())
+
 distributeGraphRows(list(a,b,c), c(1,1,1))
 distributeGraphRows(list(d,e,f), c(1,1,1))
-dev.off()

+# format the overall information
+rt0 <- data.frame(
+  ReadGroup = gsa.report$RecalTable0$ReadGroup,
+  EventType = gsa.report$RecalTable0$EventType,
+  EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality),
+  EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported),
+  Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations),
+  Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors))  
+textplot(t(rt0), show.colnames=F)
+title("Overall error rates by event type")

-if (exists('compactPDF')) {
-  compactPDF(args[2])
+# plot per quality score recalibration table
+textplot(gsa.report$RecalTable1, show.rownames=F)
+title("Error rates by event type and initial quality score")
+
+if ( ! is.na(args[3]) ) {
+  dev.off()
+  if (exists('compactPDF')) {
+    compactPDF(args[2])
+  }
 }
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
  
  if ( requestedStrat == "Sample" ) {
    perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
-    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
+    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
  } else { # by AlleleCount
    perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
    perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
--- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
+++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java
@ -208,6 +208,7 @@ public class FastaSequenceIndexBuilder {
                        break;
                }
            }
+            in.close();
            return sequenceIndex;
        }
        catch (IOException e) {
--- a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java
@ -12,10 +12,10 @@ import java.util.*;
 */
 public class Bases implements Iterable<Byte>
 {
-    public static byte A = 'A';
-    public static byte C = 'C';
-    public static byte G = 'G';
-    public static byte T = 'T';
+    public static final byte A = 'A';
+    public static final byte C = 'C';
+    public static final byte G = 'G';
+    public static final byte T = 'T';

    public static final Bases instance = new Bases();

--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.commandline;

 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.lang.annotation.Annotation;
 import java.util.List;
@ -147,6 +148,9 @@ public class ArgumentDefinition {
        this.exclusiveOf = exclusiveOf;
        this.validation = validation;
        this.validOptions = validOptions;
+
+        validateName(shortName);
+        validateName(fullName);
    }

    /**
@ -192,6 +196,9 @@ public class ArgumentDefinition {
        else
            shortName = null;

+        validateName(shortName);
+        validateName(fullName);
+
        this.ioType = ioType;
        this.argumentType = argumentType;
        this.fullName = fullName;
@ -277,4 +284,14 @@ public class ArgumentDefinition {
        String validation = (String)CommandLineUtils.getValue(annotation, "validation");
        return validation.trim().length() > 0 ? validation.trim() : null;
    }
+
+    /**
+     * Make sure the argument's name is valid
+     *
+     * @param name
+     */
+    private void validateName(final String name) {
+        if ( name != null && name.startsWith("-") )
+            throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java
@ -55,10 +55,8 @@ public class ArgumentDefinitionGroup implements Iterable<ArgumentDefinition> {
     * Does the name of this argument group match the name of another?
     */
    public boolean groupNameMatches( ArgumentDefinitionGroup other ) {
-        if( this.groupName == null && other.groupName == null )
-            return true;
-        if( this.groupName == null && other.groupName != null )
-            return false;
+        if( this.groupName == null )
+            return other.groupName == null;
        return this.groupName.equals(other.groupName);
    }

--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor {
    /**
     * our log, which we want to capture anything from org.broadinstitute.sting
     */
-    protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);
+    protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class);

    /**
     * Fetch the given descriptor from the descriptor repository.
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@ -120,8 +120,8 @@ public abstract class ParsingMethod {
     */
    private static final String TAG_TEXT = "[\\w\\-\\.\\=]*";

-    public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
+    public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
                                                                          ArgumentDefinitions.FullNameDefinitionMatcher) {};
-    public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
+    public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)),
                                                                           ArgumentDefinitions.ShortNameDefinitionMatcher) {};
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java
@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
             getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) {
            if ( getArgumentCollection().gatkKeyFile == null ) {
                throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " +
-                                        "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " +
-                                        "for more information and instructions on how to obtain a key.");
+                                        "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL +
+                                        " for more information and instructions on how to obtain a key.");
            }
            else {
                PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey();
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -130,6 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable {

        // can't close tribble index when writing
        if ( message.indexOf("Unable to close index for") != -1 )
+            exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));
+
+        // disk is full
+        if ( message.indexOf("No space left on device") != -1 )
+            exitSystemWithUserError(new UserException(t.getMessage()));
+        if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
            exitSystemWithUserError(new UserException(t.getCause().getMessage()));
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -233,10 +233,6 @@ public class GenomeAnalysisEngine {
        if (args.nonDeterministicRandomSeed)
            resetRandomGenerator(System.currentTimeMillis());

-        // TODO -- REMOVE ME WHEN WE STOP BCF testing
-        if ( args.USE_SLOW_GENOTYPES )
-            GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
-
        // if the use specified an input BQSR recalibration table then enable on the fly recalibration
        if (args.BQSR_RECAL_FILE != null)
            setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals);
@ -797,6 +793,14 @@ public class GenomeAnalysisEngine {
        if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
            throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");

+        if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
+            throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
+
+        boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
+
+        if (argCollection.keepProgramRecords)
+            removeProgramRecords = false;
+
        return new SAMDataSource(
                samReaderIDs,
                threadAllocation,
@ -813,7 +817,8 @@ public class GenomeAnalysisEngine {
                getWalkerBAQQualityMode(),
                refReader,
                getBaseRecalibration(),
-                argCollection.defaultBaseQualities);
+                argCollection.defaultBaseQualities,
+                removeProgramRecords);
    }

    /**
@ -840,20 +845,9 @@ public class GenomeAnalysisEngine {
                                                                            SAMSequenceDictionary sequenceDictionary,
                                                                            GenomeLocParser genomeLocParser,
                                                                            ValidationExclusion.TYPE validationExclusionType) {
-        VCFHeader header = null;
-        if ( getArguments().repairVCFHeader != null ) {
-            try {
-                final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
-                header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
-                pbs.close();
-            } catch ( IOException e ) {
-                throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
-            }
-        }
+        final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);

-        RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
-
-        List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
+        final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
        for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
            dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
                                                           builder,
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -57,8 +57,6 @@ public class GATKArgumentCollection {
    public GATKArgumentCollection() {
    }

-    public Map<String, String> walkerArgs = new HashMap<String, String>();
-
    // parameters and their defaults
    @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
    public List<String> samFiles = new ArrayList<String>();
@ -66,10 +64,10 @@ public class GATKArgumentCollection {
    @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
    public Integer readBufferSize = null;

-    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
    public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;

-    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false)
+    @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false)
    public File gatkKeyFile = null;

    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@ -249,6 +247,12 @@ public class GATKArgumentCollection {
    @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
    public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;

+    @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
+    public boolean removeProgramRecords = false;
+
+    @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
+    public boolean keepProgramRecords = false;
+
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

@ -375,19 +379,5 @@ public class GATKArgumentCollection {
    @Hidden
    public boolean generateShadowBCF = false;
    // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
-
-    @Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
-    @Hidden
-    public boolean USE_SLOW_GENOTYPES = false;
-    // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
-
-    /**
-     * The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
-     * and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
-     * VCF file that GATK reads in.  This allows us to have in effect a master set of header records and use these
-     * to fill in any missing ones in input VCF files.
-     */
-    @Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
-    public File repairVCFHeader = null;
 }

--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
@ -0,0 +1,62 @@
+package org.broadinstitute.sting.gatk.arguments;
+
+import org.broadinstitute.sting.commandline.Advanced;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 8/20/12
+ * A collection of arguments that are common to the various callers.
+ * This is pulled out so that every caller isn't exposed to the arguments from every other caller.
+ */
+
+public class StandardCallerArgumentCollection {
+    /**
+     * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
+     * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
+     */
+    @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
+    public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
+
+    @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
+    public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
+
+    @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
+    public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
+
+    /**
+     * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
+     * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
+     * is the default).
+     */
+    @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
+    public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
+
+    /**
+     * This argument allows you to emit low quality calls as filtered records.
+     */
+    @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
+    public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
+
+    /**
+     * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
+     */
+    @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
+    public RodBinding<VariantContext> alleles;
+
+    /**
+     * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
+     * then only this many alleles will be used.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
+     * scales exponentially based on the number of alternate alleles.  Unless there is a good reason to change the default value, we highly recommend
+     * that you not play around with this parameter.
+     */
+    @Advanced
+    @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
+    public int MAX_ALTERNATE_ALLELES = 3;
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java
@ -118,7 +118,7 @@ class WindowedData {
                rec.getAlignmentStart(),
                stop);
        states = new ArrayList<RMDDataState>();
-        if (provider != null && provider.getReferenceOrderedData() != null)
+        if (provider.getReferenceOrderedData() != null)
            for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
                states.add(new RMDDataState(dataSource, dataSource.seek(range)));
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -89,6 +89,11 @@ public class SAMDataSource {
     */
    private final SAMFileReader.ValidationStringency validationStringency;

+    /**
+     * Do we want to remove the program records from this data source?
+     */
+    private final boolean removeProgramRecords;
+
    /**
     * Store BAM indices for each reader present.
     */
@ -200,7 +205,8 @@ public class SAMDataSource {
                BAQ.QualityMode.DONT_MODIFY,
                null, // no BAQ
                null, // no BQSR
-                (byte) -1);
+                (byte) -1,
+                false);
    }

    /**
@ -233,7 +239,8 @@ public class SAMDataSource {
            BAQ.QualityMode qmode,
            IndexedFastaSequenceFile refReader,
            BaseRecalibration bqsrApplier,
-            byte defaultBaseQualities) {
+            byte defaultBaseQualities,
+            boolean removeProgramRecords) {
        this.readMetrics = new ReadMetrics();
        this.genomeLocParser = genomeLocParser;

@ -249,6 +256,7 @@ public class SAMDataSource {
            dispatcher = null;

        validationStringency = strictness;
+        this.removeProgramRecords = removeProgramRecords;
        if(readBufferSize != null)
            ReadShard.setReadBufferSize(readBufferSize);
        else {
@ -748,7 +756,7 @@ public class SAMDataSource {
        private synchronized void createNewResource() {
            if(allResources.size() > maxEntries)
                throw new ReviewedStingException("Cannot create a new resource pool.  All resources are in use.");
-            SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
+            SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
            allResources.add(readers);
            availableResources.add(readers);
        }
@ -777,9 +785,11 @@ public class SAMDataSource {
        /**
         * Derive a new set of readers from the Reads metadata.
         * @param readerIDs reads to load.
+         * TODO: validationStringency is not used here
         * @param validationStringency validation stringency.
+         * @param removeProgramRecords indicate whether to clear program records from the readers
         */
-        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
            final int totalNumberOfFiles = readerIDs.size();
            int readerNumber = 1;
            final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
            long lastTick = timer.currentTime();
            for(final SAMReaderID readerID: readerIDs) {
                final ReaderInitializer init = new ReaderInitializer(readerID).call();
+                if (removeProgramRecords) {
+                    init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
+                }
                if (threadAllocation.getNumIOThreads() > 0) {
                    inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.LinkedList;
 import java.util.List;

 /**
@ -56,30 +55,31 @@ public class ReferenceDataSource {
    private IndexedFastaSequenceFile reference;

    /** our log, which we want to capture anything from this class */
-    protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
+    protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);

    /**
     * Create reference data source from fasta file
     * @param fastaFile Fasta file to be used as reference
     */
    public ReferenceDataSource(File fastaFile) {
-
        // does the fasta file exist? check that first...
        if (!fastaFile.exists())
            throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");

-        File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
-        File dictFile;
-        if (fastaFile.getAbsolutePath().endsWith("fa")) {
-            dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
-        }
-        else
-         dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
+        final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
+
+        final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
+
+        // determine the name for the dict file
+        final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
+        final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));

        /*
-         if index file does not exist, create it manually
-          */
+        * if index file does not exist, create it manually
+        */
        if (!indexFile.exists()) {
+            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
+
            logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
            FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
            try {
@ -96,7 +96,7 @@ public class ReferenceDataSource {
            }
            catch(UserException e) {
                // Rethrow all user exceptions as-is; there should be more details in the UserException itself. 
-                throw e;    
+                throw e;
            }
            catch (Exception e) {
                // If lock creation succeeded, the failure must have been generating the index.
@ -115,6 +115,8 @@ public class ReferenceDataSource {
        * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
        */
        if (!dictFile.exists()) {
+            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
+
            logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));

            /*
@ -219,9 +221,9 @@ public class ReferenceDataSource {
            for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
                final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
                shards.add(new LocusShard(parser,
-                                          readsDataSource,
-                                          Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
-                                          null));
+                        readsDataSource,
+                        Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
+                        null));
            }
        }
        return shards;
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -58,7 +58,7 @@ import java.util.Collection;

 /** Shards and schedules data in manageable chunks. */
 public abstract class MicroScheduler implements MicroSchedulerMBean {
-    protected static Logger logger = Logger.getLogger(MicroScheduler.class);
+    protected static final Logger logger = Logger.getLogger(MicroScheduler.class);

    /**
     * Counts the number of instances of the class that are currently alive.
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
@ -66,13 +66,13 @@ public class TreeReducer implements Callable {
     * @return Result of the reduce.
     */
    public Object call() {
-        Object result = null;
+        Object result;

        final long startTime = System.currentTimeMillis();

        try {
            if( lhs == null )
-                result = lhs.get();
+                result = null;
                // todo -- what the hell is this above line?  Shouldn't it be the two below?
 //            if( lhs == null )
 //                throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) );
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;

+import java.util.Iterator;
+
 /**
 * Filter out reads with wonky cigar strings.
 *
+ *  - No reads with Hard/Soft clips in the middle of the cigar
+ *  - No reads starting with deletions (with or without preceding clips)
+ *  - No reads ending in deletions (with or without follow-up clips)
+ *  - No reads that are fully hard or soft clipped
+ *  - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
+ *
+ *  ps: apparently an empty cigar is okay...
+ *
 * @author ebanks
 * @version 0.1
 */
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {

    public boolean filterOut(final SAMRecord rec) {
        final Cigar c = rec.getCigar();
-        if( c.isEmpty() ) { return false; }                                                                             // if there is no Cigar then it can't be bad

-        boolean previousElementWasIndel = false;
-        CigarOperator lastOp = c.getCigarElement(0).getOperator();
-
-        if (lastOp == CigarOperator.D)                                                                                  // filter out reads starting with deletion
-            return true;
-        
-        for (CigarElement ce : c.getCigarElements()) {
-            CigarOperator op = ce.getOperator();
-            if (op == CigarOperator.D || op == CigarOperator.I) {
-                if (previousElementWasIndel)
-                    return true;                                                                                        // filter out reads with adjacent I/D
-
-                previousElementWasIndel = true;
-            }
-            else                                                                                                        // this is a regular base (match/mismatch/hard or soft clip)
-                previousElementWasIndel = false;                                                                        // reset the previous element
-
-            lastOp = op;
+        // if there is no Cigar then it can't be bad
+        if( c.isEmpty() ) {
+            return false;
        }

-        return lastOp == CigarOperator.D;
+        Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
+
+        CigarOperator firstOp = CigarOperator.H;
+        while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
+            CigarOperator op = elementIterator.next().getOperator();
+
+            // No reads with Hard/Soft clips in the middle of the cigar
+            if (firstOp != CigarOperator.H && op == CigarOperator.H) {
+                    return true;
+            }
+            firstOp = op;
+        }
+
+        // No reads starting with deletions (with or without preceding clips)
+        if (firstOp == CigarOperator.D) {
+            return true;
+        }
+
+        boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
+        boolean previousElementWasIndel = firstOp == CigarOperator.I;
+        CigarOperator lastOp = firstOp;
+        CigarOperator previousOp = firstOp;
+
+        while (elementIterator.hasNext()) {
+            CigarOperator op = elementIterator.next().getOperator();
+
+            if (op != CigarOperator.S && op != CigarOperator.H) {
+
+                // No reads with Hard/Soft clips in the middle of the cigar
+                if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
+                    return true;
+
+                lastOp = op;
+
+                if (!hasMeaningfulElements && op.consumesReadBases()) {
+                    hasMeaningfulElements = true;
+                }
+
+                if (op == CigarOperator.I || op == CigarOperator.D) {
+
+                    // No reads that have consecutive indels in the cigar (II, DD, ID or DI)
+                    if (previousElementWasIndel) {
+                        return true;
+                    }
+                    previousElementWasIndel = true;
+                }
+                else {
+                    previousElementWasIndel = false;
+                }
+            }
+            // No reads with Hard/Soft clips in the middle of the cigar
+            else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
+                return true;
+            }
+
+            previousOp = op;
+        }
+
+        // No reads ending in deletions (with or without follow-up clips)
+        // No reads that are fully hard or soft clipped
+        return lastOp == CigarOperator.D || !hasMeaningfulElements;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java
@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker {

        try {
            tempFile = File.createTempFile( stub.getClass().getName(), null );
-            tempFile.deleteOnExit();
+            //tempFile.deleteOnExit();
        }
        catch( IOException ex ) {
            throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java
@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
        if (stub.getGenerateMD5())
            factory.setCreateMd5File(true);
        // Adjust max records in RAM.
+        // TODO -- this doesn't actually work because of a bug in Picard; do not use until fixed
        if(stub.getMaxRecordsInRam() != null)
            factory.setMaxRecordsInRam(stub.getMaxRecordsInRam());

--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.io.storage;
 import net.sf.samtools.util.BlockCompressedOutputStream;
 import org.apache.log4j.Logger;
 import org.broad.tribble.AbstractFeatureReader;
+import org.broad.tribble.FeatureCodec;
 import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -60,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
    protected final File file;
    protected OutputStream stream;
    protected final VariantContextWriter writer;
+    boolean closed = false;

    /**
     * Constructs an object which will write directly into the output file provided by the stub.
@ -81,6 +83,18 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
            throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
    }

+    /**
+     * Constructs an object which will redirect into a different file.
+     * @param stub Stub to use when synthesizing file / header info.
+     * @param tempFile File into which to direct the output data.
+     */
+    public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
+        logger.debug("Creating temporary output file " + tempFile.getAbsolutePath() + " for VariantContext output.");
+        this.file = tempFile;
+        this.writer = vcfWriterToFile(stub, file, false);
+        writer.writeHeader(stub.getVCFHeader());
+    }
+
    /**
     * common initialization routine for multiple constructors
     * @param stub Stub to use when constructing the output file.
@ -139,19 +153,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
        }
    }

-
-    /**
-     * Constructs an object which will redirect into a different file.
-     * @param stub Stub to use when synthesizing file / header info.
-     * @param tempFile File into which to direct the output data.
-     */
-    public VariantContextWriterStorage(VariantContextWriterStub stub, File tempFile) {
-        logger.debug("Creating temporary VCF file " + tempFile.getAbsolutePath() + " for VCF output.");
-        this.file = tempFile;
-        this.writer = vcfWriterToFile(stub, file, false);
-        writer.writeHeader(stub.getVCFHeader());
-    }
-
    public void add(VariantContext vc) {
        writer.add(vc);
    }
@ -172,20 +173,34 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
        if(file != null)
            logger.debug("Closing temporary file " + file.getAbsolutePath());
        writer.close();
+        closed = true;
    }

    public void mergeInto(VariantContextWriterStorage target) {
        try {
-            String sourceFilePath = file.getAbsolutePath();
-            String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
-            logger.debug(String.format("Merging %s into %s",sourceFilePath,targetFilePath));
-            AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
+            if ( ! closed )
+                throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
+            final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
+            logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
+
+            // use the feature manager to determine the right codec for the tmp file
+            // that way we don't assume it's a specific type
+            final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file);
+            if ( fd == null )
+                throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file);
+
+            final FeatureCodec<VariantContext> codec = fd.getCodec();
+            final AbstractFeatureReader<VariantContext> source =
+                    AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false);
            
-            for ( VariantContext vc : source.iterator() ) {
+            for ( final VariantContext vc : source.iterator() ) {
                target.writer.add(vc);
            }

            source.close();
+            file.delete(); // this should be last to aid in debugging when the process fails
+        } catch (UserException e) {
+            throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
        } catch (IOException e) {
            throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
@ -47,6 +47,7 @@ import java.util.List;
 public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
    public static final String SITES_ONLY_ARG_NAME = "sites_only";
+    public static final String FORCE_BCF = "bcf";
    public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();

    //
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {

    @Override
    public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
-        return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition());
+        return Arrays.asList(
+                createDefaultArgumentDefinition(source),
+                createNoCommandLineHeaderArgumentDefinition(),
+                createSitesOnlyArgumentDefinition(),
+                createBCFArgumentDefinition() );
    }

    /**
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
        if(!source.isRequired())
            throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");        
-        VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
+        VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
        engine.addOutput(stub);
        return stub;
    }
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
        if(writerFile == null && !source.isRequired())
            throw new MissingArgumentValueException(defaultArgumentDefinition);

-        // Should we compress the output stream?
-        boolean compress = isCompressed(writerFileName);
-
-        boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
-        boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
-
        // Create a stub for the given object.
-        VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes)
-                                                  : new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes);
+        final VariantContextWriterStub stub = (writerFile != null)
+                ? new VariantContextWriterStub(engine, writerFile, argumentSources)
+                : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
+
+        stub.setCompressed(isCompressed(writerFileName));
+        stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
+        stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
+        stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));

        // WARNING: Side effects required by engine!
        parsingEngine.addTags(stub,getArgumentTags(matches));
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    }

    /**
-     * Creates the optional compression level argument for the BAM file.
-     * @return Argument definition for the BAM file itself.  Will not be null.
+     * Creates the optional no_header argument for the VCF file.
+     * @return Argument definition for the VCF file itself.  Will not be null.
     */
    private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
        return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    }

    /**
-     * Creates the optional compression level argument for the BAM file.
-     * @return Argument definition for the BAM file itself.  Will not be null.
+     * Creates the optional sites_only argument definition
+     * @return Argument definition for the VCF file itself.  Will not be null.
     */
    private ArgumentDefinition createSitesOnlyArgumentDefinition() {
        return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                                       null );
    }

+    /**
+     * Creates the optional bcf argument definition
+     * @return Argument definition for the VCF file itself.  Will not be null.
+     */
+    private ArgumentDefinition createBCFArgumentDefinition() {
+        return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
+                boolean.class,
+                FORCE_BCF,
+                FORCE_BCF,
+                "force BCF output, regardless of the file's extension",
+                false,
+                true,
+                false,
+                true,
+                null,
+                null,
+                null,
+                null );
+    }
+
    /**
     * Returns true if the file will be compressed.
     * @param writerFileName Name of the file
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
 import org.broadinstitute.sting.utils.variantcontext.writer.Options;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;

 import java.io.File;
 import java.io.OutputStream;
@ -78,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
    /**
     * Should we emit a compressed output stream?
     */
-    private final boolean isCompressed;
+    private boolean isCompressed = false;

    /**
     * A hack: push the argument sources into the VCF header so that the VCF header
@ -89,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
    /**
     * Should the header be written out?  A hidden argument.
     */
-    private final boolean skipWritingCommandLineHeader;
+    private boolean skipWritingCommandLineHeader = false;

    /**
     * Should we not write genotypes even when provided?
     */
-    private final boolean doNotWriteGenotypes;
+    private boolean doNotWriteGenotypes = false;
+
+    /**
+     * Should we force BCF writing regardless of the file extension?
+     */
+    private boolean forceBCF = false;

    /**
     * Connects this stub with an external stream capable of serving the
@ -107,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
     *
     * @param engine engine.
     * @param genotypeFile  file to (ultimately) create.
-     * @param isCompressed  should we compress the output stream?
     * @param argumentSources sources.
-     * @param skipWritingCommandLineHeader skip writing header.
-     * @param doNotWriteGenotypes do not write genotypes.
     */
-    public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
+    public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
        this.engine = engine;
        this.genotypeFile = genotypeFile;
        this.genotypeStream = null;
-        this.isCompressed = isCompressed;
        this.argumentSources = argumentSources;
-        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
-        this.doNotWriteGenotypes = doNotWriteGenotypes;
    }

    /**
@ -127,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
     *
     * @param engine engine.
     * @param genotypeStream  stream to (ultimately) write.
-     * @param isCompressed  should we compress the output stream?
     * @param argumentSources sources.
-     * @param skipWritingCommandLineHeader skip writing header.
-     * @param doNotWriteGenotypes do not write genotypes.
     */
-    public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) {
+    public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
        this.engine = engine;
        this.genotypeFile = null;
        this.genotypeStream = new PrintStream(genotypeStream);
-        this.isCompressed = isCompressed;
        this.argumentSources = argumentSources;
-        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
-        this.doNotWriteGenotypes = doNotWriteGenotypes;
    }

    /**
@ -166,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
        return isCompressed;
    }

+    public void setCompressed(boolean compressed) {
+        isCompressed = compressed;
+    }
+
+    public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
+        this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
+    }
+
+    public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
+        this.doNotWriteGenotypes = doNotWriteGenotypes;
+    }
+
+    public void setForceBCF(boolean forceBCF) {
+        this.forceBCF = forceBCF;
+    }
+
    /**
     * Gets the master sequence dictionary from the engine associated with this stub
     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
@ -186,6 +196,9 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
        if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
        if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);

+        if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
+            options.add(Options.FORCE_BCF);
+
        return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
                    return stepForwardOnGenome();
                } else {
                    if (curElement != null && curElement.getOperator() == CigarOperator.D)
-                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                        
                    // Reads that contain indels model the genomeOffset as the following base in the reference.  Because
                    // we fall into this else block only when indels end the read, increment genomeOffset  such that the
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
                    break;
                case D: // deletion w.r.t. the reference
                    if (readOffset < 0)             // we don't want reads starting with deletion, this is a malformed cigar string
-                        throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                    // should be the same as N case
                    genomeOffset++;
                    done = true;
@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator {
                    done = true;
                    break;
                case M:
+                case EQ:
+                case X:
                    readOffset++;
                    genomeOffset++;
                    done = true;
@ -279,7 +281,6 @@ public class LocusIteratorByState extends LocusIterator {
     */
    private void lazyLoadNextAlignmentContext() {
        while (nextAlignmentContext == null && readStates.hasNext()) {
-            // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
            readStates.collectPendingReads();

            final GenomeLoc location = getLocation();
@ -378,7 +379,7 @@ public class LocusIteratorByState extends LocusIterator {
                CigarOperator op = state.stepForwardOnGenome();
                if (op == null) {
                    // we discard the read only when we are past its end AND indel at the end of the read (if any) was
-                    // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
+                    // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
                    // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
                    it.remove();                                                // we've stepped off the end of the object
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -86,13 +86,14 @@ public class GATKRunReport {
    private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE");

    // number of milliseconds before the S3 put operation is timed-out:
-    private static final long S3PutTimeOut = 30 * 1000;
+    private static final long S3PutTimeOut = 10 * 1000;

+    public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest";

    /**
     * our log
     */
-    protected static Logger logger = Logger.getLogger(GATKRunReport.class);
+    protected static final Logger logger = Logger.getLogger(GATKRunReport.class);


    @Element(required = false, name = "id")
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
@ -163,43 +163,58 @@ public class VariantContextAdaptors {
        @Override        
        public VariantContext convert(String name, Object input, ReferenceContext ref) {
            OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
-            if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
-                return null;
-            Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);

-            if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
-                // add the reference allele
-                List<Allele> alleles = new ArrayList<Allele>();
-                alleles.add(refAllele);
+            int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
+            if ( index < 0 )
+                return null; // we weren't given enough reference context to create the VariantContext

-                // add all of the alt alleles
-                boolean sawNullAllele = refAllele.isNull();
-                for ( String alt : getAlternateAlleleList(dbsnp) ) {
-                    if ( ! Allele.acceptableAlleleBases(alt) ) {
-                        //System.out.printf("Excluding dbsnp record %s%n", dbsnp);
-                        return null;
-                    }
-                    Allele altAllele = Allele.create(alt, false);
-                    alleles.add(altAllele);
-                    if ( altAllele.isNull() )
-                        sawNullAllele = true;
-                }
+            final byte refBaseForIndel = ref.getBases()[index];

-                Map<String, Object> attributes = new HashMap<String, Object>();
-
-                int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
-                if ( index < 0 )
-                    return null; // we weren't given enough reference context to create the VariantContext
-                Byte refBaseForIndel = new Byte(ref.getBases()[index]);
-
-                final VariantContextBuilder builder = new VariantContextBuilder();
-                builder.source(name).id(dbsnp.getRsID());
-                builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0));
-                builder.alleles(alleles);
-                builder.referenceBaseForIndel(refBaseForIndel);
-                return builder.make();
-            } else
+            boolean addPaddingBase;
+            if ( isSNP(dbsnp) || isMNP(dbsnp) )
+                addPaddingBase = false;
+            else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
+                addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
+            else
                return null; // can't handle anything else
+
+            Allele refAllele;
+            if ( dbsnp.getNCBIRefBase().equals("-") )
+                refAllele = Allele.create(refBaseForIndel, true);
+            else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
+                return null;
+            else
+                refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
+
+            final List<Allele> alleles = new ArrayList<Allele>();
+            alleles.add(refAllele);
+
+            // add all of the alt alleles
+            for ( String alt : getAlternateAlleleList(dbsnp) ) {
+                if ( Allele.wouldBeNullAllele(alt.getBytes()))
+                    alt = "";
+                else if ( ! Allele.acceptableAlleleBases(alt) )
+                    return null;
+
+                alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
+            }
+
+            final VariantContextBuilder builder = new VariantContextBuilder();
+            builder.source(name).id(dbsnp.getRsID());
+            builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
+            builder.alleles(alleles);
+            return builder.make();
+        }
+
+        private static List<String> stripNullDashes(final List<String> alleles) {
+            final List<String> newAlleles = new ArrayList<String>(alleles.size());
+            for ( final String allele : alleles ) {
+                if ( allele.equals("-") )
+                    newAlleles.add("");
+                else
+                    newAlleles.add(allele);
+            }
+            return newAlleles;
        }
    }

@ -294,7 +309,6 @@ public class VariantContextAdaptors {
            int index = hapmap.getStart() - ref.getWindow().getStart();
            if ( index < 0 )
                return null; // we weren't given enough reference context to create the VariantContext
-            Byte refBaseForIndel = new Byte(ref.getBases()[index]);

            HashSet<Allele> alleles = new HashSet<Allele>();
            Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -351,7 +365,7 @@ public class VariantContextAdaptors {
            long end = hapmap.getEnd();
            if ( deletionLength > 0 )
                end += deletionLength;
-            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make();
+            VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
            return vc;
       }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
@ -85,18 +85,16 @@ public class FeatureManager  {

    private final PluginManager<FeatureCodec> pluginManager;
    private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
-    private final VCFHeader headerForRepairs;
    private final boolean lenientVCFProcessing;

    /**
     * Construct a FeatureManager without a master VCF header
     */
    public FeatureManager() {
-        this(null, false);
+        this(false);
    }

-    public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
-        this.headerForRepairs = headerForRepairs;
+    public FeatureManager(final boolean lenientVCFProcessing) {
        this.lenientVCFProcessing = lenientVCFProcessing;
        pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");

@ -255,8 +253,6 @@ public class FeatureManager  {
            ((NameAwareCodec)codex).setName(name);
        if ( codex instanceof ReferenceDependentFeatureCodec )
            ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
-        if ( codex instanceof VCFCodec )
-            ((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
        if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
            ((AbstractVCFCodec)codex).disableOnTheFlyModifications();

--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
     * please talk through your approach with the SE team.
     * @param dict Sequence dictionary to use.
     * @param genomeLocParser Location parser to use.
-     * @param headerForRepairs a VCF header that should be used to repair VCF headers.  Can be null
     * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
     */
    public RMDTrackBuilder(final SAMSequenceDictionary dict,
                           final GenomeLocParser genomeLocParser,
-                           final VCFHeader headerForRepairs,
                           ValidationExclusion.TYPE validationExclusionType) {
        this.dict = dict;
        this.validationExclusionType = validationExclusionType;
        this.genomeLocParser = genomeLocParser;
-        this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
+        this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
    }

    /**
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        return featureManager;
    }

-    /**
-     * Same as full constructor but makes one without a header for repairs
-     * @param dict
-     * @param genomeLocParser
-     * @param validationExclusionType
-     */
-    public RMDTrackBuilder(final SAMSequenceDictionary dict,
-                           final GenomeLocParser genomeLocParser,
-                           ValidationExclusion.TYPE validationExclusionType) {
-        this(dict, genomeLocParser, null, validationExclusionType);
-    }
-
    /**
     * create a RMDTrack of the specified type
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@ -89,9 +89,9 @@ public class GATKReport {
            reader = new BufferedReader(new FileReader(file));
            reportHeader = reader.readLine();
        } catch (FileNotFoundException e) {
-            throw new ReviewedStingException("Could not open file : " + file);
+            throw new UserException.CouldNotReadInputFile(file, "it does not exist");
        } catch (IOException e) { 
-            throw new ReviewedStingException("Could not read file : " + file);                            
+            throw new UserException.CouldNotReadInputFile(file, e);
        }   


--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -208,11 +208,23 @@ public class GATKReportTable {
    }

    /**
-     * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
-     *
-     * @param name the name of the table or column
-     * @return true if the name is valid, false if otherwise
+     * Create a new GATKReportTable with the same structure
+     * @param tableToCopy
     */
+    public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
+        this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID);
+        for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
+            addColumn(column.getColumnName(), column.getFormat());
+        if ( copyData )
+            throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported");
+    }
+
+        /**
+        * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
+        *
+        * @param name the name of the table or column
+        * @return true if the name is valid, false if otherwise
+        */
    private boolean isValidName(String name) {
        Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
        Matcher m = p.matcher(name);
@ -490,6 +502,17 @@ public class GATKReportTable {
        return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName));
    }

+    /**
+     * Get a value from the given position in the table
+     *
+     * @param rowIndex       the row ID
+     * @param columnName  the name of the column
+     * @return the value stored at the specified position in the table
+     */
+    public Object get(final int rowIndex, final String columnName) {
+        return get(rowIndex, columnNameToIndex.get(columnName));
+    }
+
    /**
     * Get a value from the given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -62,54 +62,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide

    }

-    /**
-     * Simple utility class that makes it convenient to print unit adjusted times
-     */
-    private static class MyTime {
-        double t;           // in Seconds
-        int precision;      // for format
-
-        public MyTime(double t, int precision) {
-            this.t = t;
-            this.precision = precision;
-        }
-
-        public MyTime(double t) {
-            this(t, 1);
-        }
-
-        /**
-         * Instead of 10000 s, returns 2.8 hours
-         * @return
-         */
-        public String toString() {
-            double unitTime = t;
-            String unit = "s";
-
-            if ( t > 120 ) {
-                unitTime = t / 60; // minutes
-                unit = "m";
-
-                if ( unitTime > 120 ) {
-                    unitTime /= 60; // hours
-                    unit = "h";
-
-                    if ( unitTime > 100 ) {
-                        unitTime /= 24; // days
-                        unit = "d";
-
-                        if ( unitTime > 20 ) {
-                            unitTime /= 7; // days
-                            unit = "w";
-                        }
-                    }
-                }
-            }
-
-            return String.format("%6."+precision+"f %s", unitTime, unit);
-        }
-    }
-
    /** lock object to sure updates to history are consistent across threads */
    private static final Object lock = new Object();
    LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@ -140,7 +92,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
    GenomeLocSortedSet targetIntervals = null;

    /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);

    protected GenomeAnalysisEngine engine;

@ -280,20 +232,20 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide

            ProcessingHistory last = updateHistory(loc,cumulativeMetrics);

-            final MyTime elapsed = new MyTime(last.elapsedSeconds);
-            final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
-            final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
+            final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
+            final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
+            final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
            final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
-            final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
-            final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
+            final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
+            final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());

            if ( printProgress ) {
                lastProgressPrintTime = curTime;

                // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
-                if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
+                if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
                    PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
-                else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
+                else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
                    PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
                else
                    PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
@ -308,8 +260,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
                lastPerformanceLogPrintTime = curTime;
                synchronized(performanceLogLock) {
                    performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
-                            elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
-                            fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
+                            elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed,
+                            bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(),
+                            timeToCompletion.getTimeInSeconds());
                }
            }
        }
@ -401,7 +354,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide

        synchronized(performanceLogLock) {
            // Ignore multiple calls to reset the same lock.
-            if(performanceLogFile != null && performanceLogFile.equals(fileName))
+            if(performanceLogFile != null && performanceLogFile.equals(file))
                return;

            // Close an existing log
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@ -28,7 +29,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
    /**
     * our log, which we want to capture anything from this class
     */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected final static Logger logger = Logger.getLogger(TraversalEngine.class);

    private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
    private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                    for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
                        final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
-                            profile.add(fakeLoc, isActiveProb);
+                            profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
                        }
                    }
                }
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio

                // Call the walkers isActive function for this locus and add them to the list to be integrated later
                if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
-                    final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
-                    profile.add(location, isActiveProb);
+                    profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
                }

                // Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
    //
    // --------------------------------------------------------------------------------

-    private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
+    private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
                                          final RefMetaDataTracker tracker, final ReferenceContext refContext,
                                          final AlignmentContext locus, final GenomeLoc location) {
        if ( walker.hasPresetActiveRegions() ) {
-            return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
+            return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
        } else {
            return walker.isActive( tracker, refContext, locus );
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
@ -19,7 +19,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
    /**
     * our log, which we want to capture anything from this class
     */
-    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);

    @Override
    protected String getTraversalType() {
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
@ -24,7 +24,7 @@ import java.util.List;
 public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<M,T>,ReadShardDataProvider> {

    /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraverseReadPairs.class);
+    protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class);

    @Override
    protected String getTraversalType() {
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 */
 public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {    
    /** our log, which we want to capture anything from this class */
-    protected static Logger logger = Logger.getLogger(TraverseReads.class);
+    protected static final Logger logger = Logger.getLogger(TraverseReads.class);

    @Override
    protected String getTraversalType() {
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
        if( !dataProvider.hasReads() )
            throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");

-        boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
-
        ReadView reads = new ReadView(dataProvider);
        ReadReferenceView reference = new ReadReferenceView(dataProvider);

@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
            ReferenceContext refContext = null;

            // get the array of characters for the reference sequence, since we're a mapped read
-            if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
+            if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
                refContext = reference.getReferenceContext(read);

            // update the number of reads we've seen
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -27,10 +28,11 @@ import java.util.List;
 */

@By(DataSource.READS)
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
+@RemoveProgramRecords
 public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {

    @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
@ -72,7 +74,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
    }

    // Determine probability of active status over the AlignmentContext
-    public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
+    public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);

    // Map over the ActiveRegion
    public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java
@ -574,7 +574,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
        }
    }

-    public class ReadClipperWithData extends ReadClipper {
+    public static class ReadClipperWithData extends ReadClipper {
        private ClippingData data;

        public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
 * Allow user to choose between a number of different data sources.
 */
 public enum DataSource {
+    /**
+     * Does this walker require read (BAM) data to work?
+     */
    READS,
+
+    /**
+     * Does this walker require reference data to work?
+     */
    REFERENCE,
-    REFERENCE_BASES,         // Do I actually need the reference bases passed to the walker?
+
+    /**
+     * Does this walker require reference order data (VCF) to work?
+     */
    REFERENCE_ORDERED_DATA
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -16,9 +16,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 * To change this template use File | Settings | File Templates.
 */
@By(DataSource.READS)
-@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+@RemoveProgramRecords
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
    public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
@ -64,9 +64,17 @@ import java.util.List;
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
 public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
+
+    private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
+
    @Output
    PrintStream out;

+    /**
+     * In addition to the standard pileup output, adds 'verbose' output too.  The verbose output contains the number of spanning deletions,
+     * and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality.  These per
+     * read items are delimited with an '@' character.
+     */
    @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
    public boolean SHOW_VERBOSE = false;

@ -116,8 +124,6 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
        return rodString;
    }

-    private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
-
    private static String createVerboseOutput(final ReadBackedPileup pileup) {
        final StringBuilder sb = new StringBuilder();
        boolean isFirst = true;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 * Time: 2:52:28 PM
 * To change this template use File | Settings | File Templates.
 */
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
 public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    public boolean requiresOrderedReads() { return false; }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
 * To change this template use File | Settings | File Templates.
 */
@By(DataSource.REFERENCE)
-@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.REFERENCE})
@Allows(DataSource.REFERENCE)
 public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java
@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 8/2/12
+ * Time: 1:58 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that program records should be removed from SAM headers by default for this walker
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface RemoveProgramRecords {
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@ -65,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
                // by design, first element in LinkedHashMap was ref allele
                double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;

-                for (Allele a : el.keySet()) {
+                for (Map.Entry<Allele, Double> entry : el.entrySet()) {

-                    if (a.isReference())
-                        refLikelihood =el.get(a);
+                    if (entry.getKey().isReference())
+                        refLikelihood = entry.getValue();
                    else {
-                        double like = el.get(a);
+                        double like = entry.getValue();
                        if (like >= altLikelihood)
                            altLikelihood = like;
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -22,19 +22,10 @@ import java.util.Map;
 /**
 * Total (unfiltered) depth over all samples.
 *
- * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
- * at this site.  The DP field describe the total depth of reads that passed the Unified Genotypers internal
- * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
- * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
- * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
- * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
- * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
- * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
- * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
- * normally be excluded from the statistical calculations going into GQ and QUAL.
- *
- * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
- * -dcov D is N * D
+ * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
+ * internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
+ * over all samples.  Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
+ * N samples with -dcov D is N * D
 */
 public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@ -24,10 +24,10 @@ import java.util.List;
 /**
 * The depth of coverage of each VCF allele in this sample.
 *
- * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
- * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
- * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
- * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
+ * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
+ * sample at this site.  While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
+ * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
+ * REF and ALT fields) is the unfiltered count of all reads that carried with them the
 * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
 * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
 * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
@ -35,17 +35,13 @@ import java.util.List;
 * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
 * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
 * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
- * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
+ * are unambiguously informative about the alternate allele). Because of this fact and
 * because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
 * assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
- * determine the genotype calls (see below).
+ * determine the genotype calls.
 */
 public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {

-    private static final String REF_ALLELE = "REF";
-
-    private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
-
    public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
        if ( g == null || !g.isCalled() )
            return;
@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
        if ( vc.isSNP() )
            annotateSNP(stratifiedContext, vc, gb);
        else if ( vc.isIndel() )
-            annotateIndel(stratifiedContext, vc, gb);
+            annotateIndel(stratifiedContext, ref.getBase(), vc, gb);
    }

-    private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) {

        HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
        for ( Allele allele : vc.getAlleles() )
@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
        gb.AD(counts);
    }

-    private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
+    private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) {
        ReadBackedPileup pileup = stratifiedContext.getBasePileup();
        if ( pileup == null )
            return;

-        final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
-        alleleCounts.put(REF_ALLELE, 0);
+        final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
        final Allele refAllele = vc.getReference();

-        for ( Allele allele : vc.getAlternateAlleles() ) {
-
-            if ( allele.isNoCall() ) {
-                continue; // this does not look so good, should we die???
-            }
-
-            alleleCounts.put(getAlleleRepresentation(allele), 0);
+        for ( final Allele allele : vc.getAlleles() ) {
+            alleleCounts.put(allele, 0);
        }

        for ( PileupElement p : pileup ) {
            if ( p.isBeforeInsertion() ) {

-                final String b = p.getEventBases();
-                if ( alleleCounts.containsKey(b) ) {
-                    alleleCounts.put(b, alleleCounts.get(b)+1);
+                final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
+                if ( alleleCounts.containsKey(insertion) ) {
+                    alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
                }

            } else if ( p.isBeforeDeletionStart() ) {
-                    if ( p.getEventLength() == refAllele.length() ) {
-                        // this is indeed the deletion allele recorded in VC
-                        final String b = DEL;
-                        if ( alleleCounts.containsKey(b) ) {
-                            alleleCounts.put(b, alleleCounts.get(b)+1);
-                        }
+                if ( p.getEventLength() == refAllele.length() - 1 ) {
+                    // this is indeed the deletion allele recorded in VC
+                    final Allele deletion = Allele.create(refBase);
+                    if ( alleleCounts.containsKey(deletion) ) {
+                        alleleCounts.put(deletion, alleleCounts.get(deletion)+1);
                    }
+                }
            } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) {
-                alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1);
+                alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1);
            }
        }

-        int[] counts = new int[alleleCounts.size()];
-        counts[0] = alleleCounts.get(REF_ALLELE);
+        final int[] counts = new int[alleleCounts.size()];
+        counts[0] = alleleCounts.get(refAllele);
        for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
-            counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
+            counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) );

        gb.AD(counts);
    }

-    private String getAlleleRepresentation(Allele allele) {
-        if ( allele.isNull() ) { // deletion wrt the ref
-             return DEL;
-        } else { // insertion, pass actual bases
-            return allele.getBaseString();
-        }
-
-    }
-
 //   public String getIndelBases()
    public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -291,8 +291,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
        
        int[][] table = new int[2][2];

-        for ( String sample : stratifiedContexts.keySet() ) {
-            final AlignmentContext context = stratifiedContexts.get(sample);
+        for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
+            final AlignmentContext context = sample.getValue();
            if ( context == null )
                continue;

@ -313,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat

                    double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;

-                    for (Allele a : el.keySet()) {
+                    for (Map.Entry<Allele,Double> entry : el.entrySet()) {

-                        if (a.isReference())
-                            refLikelihood =el.get(a);
+                        if (entry.getKey().isReference())
+                            refLikelihood = entry.getValue();
                        else {
-                            double like = el.get(a);
+                            double like = entry.getValue();
                            if (like >= altLikelihood)
                                altLikelihood = like;
                        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
        return map;
    }

-    private class HaplotypeComparator implements Comparator<Haplotype> {
+    private static class HaplotypeComparator implements Comparator<Haplotype> {

        public int compare(Haplotype a, Haplotype b) {
            if (a.getQualitySum() < b.getQualitySum())
@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
                // Score all the reads in the pileup, even the filtered ones
                final double[] scores = new double[el.size()];
                int i = 0;
-                for (Allele a : el.keySet()) {
-                    scores[i++] = -el.get(a);
+                for (Map.Entry<Allele, Double> a : el.entrySet()) {
+                    scores[i++] = -a.getValue();
                    if (DEBUG) {
                        System.out.printf("  vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@ -61,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
                // by design, first element in LinkedHashMap was ref allele
                double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;

-                for (Allele a : el.keySet()) {
+                for (Map.Entry<Allele,Double> a : el.entrySet()) {

-                    if (a.isReference())
-                        refLikelihood =el.get(a);
+                    if (a.getKey().isReference())
+                        refLikelihood = a.getValue();
                    else {
-                        double like = el.get(a);
+                        double like = a.getValue();
                        if (like >= altLikelihood)
                            altLikelihood = like;
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@ -87,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
                LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p);           // retrieve likelihood information corresponding to this read
                double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY;   // by design, first element in LinkedHashMap was ref allele

-                for (Allele a : el.keySet()) {
-                    if (a.isReference())
-                        refLikelihood = el.get(a);
+                for (Map.Entry<Allele,Double> a : el.entrySet()) {
+                    if (a.getKey().isReference())
+                        refLikelihood = a.getValue();
                    else {
-                        double like = el.get(a);
+                        double like = a.getValue();
                        if (like >= altLikelihood)
                            altLikelihood = like;
                    }
@ -100,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
                int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
                final int numAlignedBases = getNumAlignedBases(p.getRead());

-                int rp = readPos;
                if (readPos > numAlignedBases / 2) {
                    readPos = numAlignedBases - (readPos + 1);
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
@ -66,8 +66,8 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
        return map;
    }

-    public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
-    public static final VCFInfoHeaderLine[] descriptions = {
+    protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
+    protected static final VCFInfoHeaderLine[] descriptions = {
            new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
            new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
            new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
@ -142,9 +141,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle"));
        hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle"));

-        // Open output file specified by output VCF ROD
-        final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
-
        if ( comp.isBound() ) {
            hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
            hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
@ -250,8 +246,6 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {

            // Beagle always produces genotype strings based on the strings we input in the likelihood file.
            String refString = vc_input.getReference().getDisplayString();
-            if (refString.length() == 0) // ref was null
-                refString = Allele.NULL_ALLELE_STRING;

            Allele bglAlleleA, bglAlleleB;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java
@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
        if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
        for ( Allele allele : preferredVC.getAlleles() ) {
            String bglPrintString;
-            if (allele.isNoCall() || allele.isNull())
+            if (allele.isNoCall())
                bglPrintString = "-";
            else
                bglPrintString = allele.getBaseString();  // get rid of * in case of reference allele
@ -351,7 +351,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
    }

    public static class CachingFormatter {
-        private int maxCacheSize = 0;
        private String format;
        private LRUCache<Double, String> cache;

@ -379,7 +378,6 @@ public class ProduceBeagleInput extends RodWalker<Integer, Integer> {
        }

        public CachingFormatter(String format, int maxCacheSize) {
-            this.maxCacheSize = maxCacheSize;
            this.format = format;
            this.cache = new LRUCache<Double, String>(maxCacheSize);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java
@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker<Integer, Integer> {

        // write out the alleles at this site
        for ( Allele allele : vc.getAlleles() ) {
-            beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" ");
+            beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" ");
        }

        // write out sample level genotypes
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 import org.broadinstitute.sting.commandline.Gatherer;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;

 import java.io.File;
 import java.io.FileNotFoundException;
@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer  {
        if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
            final File recal_out = new File(output.getName() + ".original");
            final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
-            RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
        }
        else if (!RAC.NO_PLOTS) {
            final File recal_out = new File(output.getName() + ".recal");
-            RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES);
        }

        generalReport.output(outputFile);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;
+import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -103,13 +107,13 @@ import java.util.ArrayList;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})                                   // only look at covered loci, not every loci of the reference file
-@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})                                         // filter out all reads with zero or unavailable mapping quality
+@Requires({DataSource.READS, DataSource.REFERENCE})                                         // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS)                                                                                       // this walker requires both -I input.bam and -R reference.fasta
 public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
    @ArgumentCollection
    private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();                          // all the command line arguments for BQSR and it's covariates

-    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization 
+    private QuantizationInfo quantizationInfo;                                                                          // an object that keeps track of the information necessary for quality score quantization
    
    private RecalibrationTables recalibrationTables;

@ -132,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
     */
    public void initialize() {

+        // TODO -- remove me after the 2.1 release
+        if ( getToolkit().getArguments().numberOfThreads > 1 )
+            throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons.  We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
+
        // check for unsupported access
        if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
            throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
@ -143,12 +151,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
            throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION);

        if (RAC.LIST_ONLY) {
-            RecalDataManager.listAvailableCovariates(logger);
+            RecalUtils.listAvailableCovariates(logger);
            System.exit(0);
        }
        RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE;                                          // if we have a recalibration file, record it so it goes on the report table

-        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC);       // initialize the required and optional covariates
+        Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC);       // initialize the required and optional covariates
        ArrayList<Covariate> requiredCovariates = covariates.getFirst();
        ArrayList<Covariate> optionalCovariates = covariates.getSecond();

@ -222,17 +230,17 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed

                if (readNotSeen(read)) {
                    read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
-                    RecalDataManager.parsePlatformForRead(read, RAC);
-                    if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
+                    RecalUtils.parsePlatformForRead(read, RAC);
+                    if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
                        read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
                        continue;
                    }
-                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates));
+                    read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
                }

                if (!ReadUtils.isSOLiDRead(read) ||                                                                     // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
-                    RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
-                        RecalDataManager.isColorSpaceConsistent(read, offset))
+                    RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
+                        RecalUtils.isColorSpaceConsistent(read, offset))
                    recalibrationEngine.updateDataForPileupElement(p, ref.getBase());                                                             // This base finally passed all the checks for a good base, so add it to the big data hashmap
            }
            countedSites++;
@ -242,9 +250,9 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
    }

    /**
-     * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker.
+     * Initialize the reduce step by returning 0L
     *
-     * @return returns A PrintStream created from the -recalFile filename argument specified to the walker
+     * @return returns 0L
     */
    public Long reduceInit() {
        return 0L;
@ -271,13 +279,16 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
    public void onTraversalDone(Long result) {
        logger.info("Calculating quantized quality scores...");
        quantizeQualityScores();
+
+        logger.info("Writing recalibration report...");
+        generateReport();
+        logger.info("...done!");
+
        if (!RAC.NO_PLOTS) {
            logger.info("Generating recalibration plots...");
            generatePlots();
        }
-        logger.info("Writing recalibration report...");
-        generateReport();
-        logger.info("...done!");
+
        logger.info("Processed: " + result + " sites");
    }

@ -285,10 +296,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
        File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
        if (recalFile != null) {
            RecalibrationReport report = new RecalibrationReport(recalFile);
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
        }
        else
-            RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
+            RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES);
    }


@ -309,7 +320,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
            throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created");
        }

-        RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
+        RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output);
    }
 }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java
@ -1,109 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-import org.broadinstitute.sting.utils.QualityUtils;
-
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Jan 6, 2010
- *
- * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed.
- * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class Datum {
-
-    long numObservations;                                                                                     // number of bases seen in total
-    long numMismatches;                                                                                       // number of bases seen that didn't match the reference
-
-    private static final int SMOOTHING_CONSTANT = 1;                                                                    // used when calculating empirical qualities to avoid division by zero
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public Datum() {
-        numObservations = 0L;
-        numMismatches = 0L;
-    }
-
-    public Datum(long numObservations, long numMismatches) {
-        this.numObservations = numObservations;
-        this.numMismatches = numMismatches;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // increment methods
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    synchronized void increment(final long incObservations, final long incMismatches) {
-        numObservations += incObservations;
-        numMismatches += incMismatches;
-    }
-
-    synchronized void increment(final boolean isError) {
-        numObservations++;
-        numMismatches += isError ? 1:0;
-    }
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // methods to derive empirical quality score
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    double empiricalQualDouble() {
-        final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
-        final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
-        final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
-        return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
-    }
-
-    byte empiricalQualByte() {
-        final double doubleMismatches = (double) (numMismatches);
-        final double doubleObservations = (double) (numObservations);
-        return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations);                                    // This is capped at Q40
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof Datum))
-            return false;
-        Datum other = (Datum) o;
-        return numMismatches == other.numMismatches && numObservations == other.numObservations;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java
@ -1,148 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.bqsr;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-import com.google.java.contract.Ensures;
-import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
-
-import java.util.Random;
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Nov 3, 2009
- *
- * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
- */
-
-public class RecalDatum extends Datum {
-
-    private static final double UNINITIALIZED = -1.0;
-
-    private double estimatedQReported;                                                                                  // estimated reported quality score based on combined data's individual q-reporteds and number of observations
-    private double empiricalQuality;                                                                                    // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
-
-
-    //---------------------------------------------------------------------------------------------------------------
-    //
-    // constructors
-    //
-    //---------------------------------------------------------------------------------------------------------------
-
-    public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) {
-        numObservations = _numObservations;
-        numMismatches = _numMismatches;
-        estimatedQReported = reportedQuality;
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    public RecalDatum(final RecalDatum copy) {
-        this.numObservations = copy.numObservations;
-        this.numMismatches = copy.numMismatches;
-        this.estimatedQReported = copy.estimatedQReported;
-        this.empiricalQuality = copy.empiricalQuality;
-    }
-
-    public void combine(final RecalDatum other) {
-        final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
-        increment(other.numObservations, other.numMismatches);
-        estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Override
-    public void increment(final boolean isError) {
-        super.increment(isError);
-        empiricalQuality = UNINITIALIZED;
-    }
-
-    @Requires("empiricalQuality == UNINITIALIZED")
-    @Ensures("empiricalQuality != UNINITIALIZED")
-    protected final void calcEmpiricalQuality() {
-        empiricalQuality = empiricalQualDouble();                                                                       // cache the value so we don't call log over and over again
-    }
-
-    public void setEstimatedQReported(final double estimatedQReported) {
-        this.estimatedQReported = estimatedQReported;
-    }
-
-    public final double getEstimatedQReported() {
-        return estimatedQReported;
-    }
-
-    public void setEmpiricalQuality(final double empiricalQuality) {
-        this.empiricalQuality = empiricalQuality;
-    }
-
-    public final double getEmpiricalQuality() {
-        if (empiricalQuality == UNINITIALIZED)
-            calcEmpiricalQuality();
-        return empiricalQuality;
-    }
-
-    @Override
-    public String toString() {
-        return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
-    }
-
-    public String stringForCSV() {
-        return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
-    }
-
-    private double calcExpectedErrors() {
-        return (double) this.numObservations * qualToErrorProb(estimatedQReported);
-    }
-
-    private double qualToErrorProb(final double qual) {
-        return Math.pow(10.0, qual / -10.0);
-    }
-
-    public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
-        final Random random = new Random();
-        final int nObservations = random.nextInt(maxObservations);
-        final int nErrors = random.nextInt(maxErrors);
-        final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
-        return new RecalDatum(nObservations, nErrors, (byte)qual);
-    }
-
-    /**
-     * We don't compare the estimated quality reported because it may be different when read from
-     * report tables.
-     *
-     * @param o the other recal datum
-     * @return true if the two recal datums have the same number of observations, errors and empirical quality.
-     */
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof RecalDatum))
-            return false;
-        RecalDatum other = (RecalDatum) o;
-        return super.equals(o) &&
-               MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@ -29,6 +29,7 @@ import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.report.GATKReportTable;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.recalibration.RecalUtils;

 import java.io.File;
 import java.util.Collections;
@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection {
     * reads which have had the reference inserted because of color space inconsistencies.
     */
    @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
-    public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
+    public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;

    /**
     * CountCovariates and TableRecalibration accept a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection {
     * their color space tag can not be recalibrated.
     */
    @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
-    public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
+    public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;

    /**
     * The context covariate will use a context of this size to calculate it's covariate value for base mismatches
@ -174,44 +175,44 @@ public class RecalibrationArgumentCollection {

    public File recalibrationReport = null;

-    public GATKReportTable generateReportTable() {
+    public GATKReportTable generateReportTable(final String covariateNames) {
        GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
        argumentsTable.addColumn("Argument");
-        argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
+        argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
        argumentsTable.addRowID("covariate", true);
-        argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
+        argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
        argumentsTable.addRowID("no_standard_covs", true);
-        argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
+        argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
        argumentsTable.addRowID("run_without_dbsnp", true);
-        argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
+        argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
        argumentsTable.addRowID("solid_recal_mode", true);
-        argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
+        argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
        argumentsTable.addRowID("solid_nocall_strategy", true);
-        argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
+        argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
        argumentsTable.addRowID("mismatches_context_size", true);
-        argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
+        argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
        argumentsTable.addRowID("indels_context_size", true);
-        argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
+        argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
        argumentsTable.addRowID("mismatches_default_quality", true);
-        argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
+        argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
        argumentsTable.addRowID("insertions_default_quality", true);
-        argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
+        argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
        argumentsTable.addRowID("low_quality_tail", true);
-        argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
+        argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
        argumentsTable.addRowID("default_platform", true);
-        argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
+        argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
        argumentsTable.addRowID("force_platform", true);
-        argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
+        argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
        argumentsTable.addRowID("quantizing_levels", true);
-        argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
+        argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
        argumentsTable.addRowID("keep_intermediate_files", true);
-        argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
+        argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
        argumentsTable.addRowID("no_plots", true);
-        argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
+        argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
        argumentsTable.addRowID("recalibration_report", true);
-        argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
+        argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
        argumentsTable.addRowID("binary_tag_name", true);
-        argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
+        argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
        return argumentsTable;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.bqsr;

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java
@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
 import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
 import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
@ -24,6 +24,7 @@

 package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;

+import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
 import org.broadinstitute.sting.gatk.walkers.PartitionBy;
 import org.broadinstitute.sting.gatk.walkers.PartitionType;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

 import java.io.PrintStream;
@ -45,15 +47,17 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
    @Output(required = true)
    private PrintStream out;

+    @Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false)
+    private int coverageThreshold = 20;
+
    @Override
    // Look to see if the region has sufficient coverage
-    public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+    public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {

        int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());

        // note the linear probability scale
-        int coverageThreshold = 20;
-        return Math.min((double) depth / coverageThreshold, 1);
+        return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));

    }

@ -74,9 +78,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
    public Long reduce(final GenomeLoc value, Long reduce) {
        if (value != null) {
            out.println(value.toString());
-            return reduce++;
-        } else
-            return reduce;
+            reduce++;
+        }
+        return reduce;
    }

    @Override
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java
@ -264,7 +264,7 @@ class SampleStatistics {
            return false;

        // different contigs
-        if (read.getMateReferenceIndex() != read.getReferenceIndex())
+        if (!read.getMateReferenceIndex().equals(read.getReferenceIndex()))
            return false;

        // unmapped
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java
@ -104,7 +104,9 @@ public class BAMDiffableReader implements DiffableReader {
            InputStream fstream = new BufferedInputStream(new FileInputStream(file));
            if ( !BlockCompressedInputStream.isValidFile(fstream) )
                return false;
-            new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length);
+            final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream);
+            BCIS.read(buffer, 0, BAM_MAGIC.length);
+            BCIS.close();
            return Arrays.equals(buffer, BAM_MAGIC);
        } catch ( IOException e ) {
            return false;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java
@ -224,7 +224,7 @@ public class DiffNode extends DiffValue {
        // X=(A=A B=B C=(D=D))
        String[] parts = tree.split("=", 2);
        if ( parts.length != 2 )
-            throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts);
+            throw new ReviewedStingException("Unexpected tree structure: " + tree);
        String name = parts[0];
        String value = parts[1];

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java
@ -90,8 +90,10 @@ public class GATKReportDiffableReader implements DiffableReader {
    public boolean canRead(File file) {
        try {
            final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX;
-            char[] buff = new char[HEADER.length()];
-            new FileReader(file).read(buff, 0, HEADER.length());
+            final char[] buff = new char[HEADER.length()];
+            final FileReader FR = new FileReader(file);
+            FR.read(buff, 0, HEADER.length());
+            FR.close();
            String firstLine = new String(buff);
            return firstLine.startsWith(HEADER);
        } catch (IOException e) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java
@ -47,7 +47,10 @@ import java.util.List;
 * <p>
 * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
 * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
- * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Several important notes:
+ * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
+ * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.
+ * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions).
 * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
@ -102,16 +105,16 @@ public class FastaAlternateReference extends FastaReference {
        String refBase = String.valueOf((char)ref.getBase());

        // Check to see if we have a called snp
-        for ( VariantContext vc : tracker.getValues(variants) ) {
+        for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
            if ( vc.isFiltered() )
                continue;

            if ( vc.isSimpleDeletion()) {
-                deletionBasesRemaining = vc.getReference().length();
+                deletionBasesRemaining = vc.getReference().length() - 1;
                // delete the next n bases, not this one
                return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
            } else if ( vc.isSimpleInsertion()) {
-                return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
+                return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
            } else if (vc.isSNP()) {
                return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {

    public enum Model {
        /** The default model with the best performance in all cases */
-        EXACT,
-        POOL
+        EXACT
    }

    protected int N;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
@ -148,8 +148,8 @@ public class ConsensusAlleleCounter {
                    boolean foundKey = false;
                    // copy of hashmap into temp arrayList
                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
-                    for (String s : consensusIndelStrings.keySet()) {
-                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    for (Map.Entry<String, Integer> s : consensusIndelStrings.entrySet()) {
+                        cList.add(new Pair<String, Integer>(s.getKey(), s.getValue()));
                    }

                    if (read.getAlignmentEnd() == loc.getStart()) {
@ -246,18 +246,19 @@ public class ConsensusAlleleCounter {
                // get ref bases of accurate deletion
                final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
                stop = loc.getStart() + dLen;
-                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
+                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen);   // add reference padding

                if (Allele.acceptableAlleleBases(refBases, false)) {
                    refAllele = Allele.create(refBases, true);
-                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+                    altAllele = Allele.create(ref.getBase(), false);
                }
                else continue; // don't go on with this allele if refBases are non-standard
            } else {
                // insertion case
-                if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
-                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
-                    altAllele = Allele.create(s, false);
+                final String insertionBases = (char)ref.getBase() + s;  // add reference padding
+                if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
+                    refAllele = Allele.create(ref.getBase(), true);
+                    altAllele = Allele.create(insertionBases, false);
                    stop = loc.getStart();
                }
                else continue; // go on to next allele if consensus insertion has any non-standard base.
@ -267,7 +268,6 @@ public class ConsensusAlleleCounter {
            final VariantContextBuilder builder = new VariantContextBuilder().source("");
            builder.loc(loc.getContig(), loc.getStart(), stop);
            builder.alleles(Arrays.asList(refAllele, altAllele));
-            builder.referenceBaseForIndel(ref.getBase());
            builder.noGenotypes();
            if (doMultiAllelicCalls) {
                vcs.add(builder.make());
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
    public enum Model {
        SNP,
        INDEL,
-        BOTH,
-        POOLSNP,
-        POOLINDEL,
-        POOLBOTH
+        GeneralPloidySNP,
+        GeneralPloidyINDEL,
+        BOTH
    }

    public enum GENOTYPING_MODE {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.Haplotype;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    private boolean DEBUG = false;
    private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
    private PairHMMIndelErrorModel pairModel;
-    private boolean allelesArePadded;
-    
+
    private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
            new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
                protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
            haplotypeMap.clear();

-            Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
-            alleleList = pair.first;
-            allelesArePadded = pair.second;
+            alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
            if (alleleList.isEmpty())
                return null;
        }

-
        getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
        if (haplotypeMap == null || haplotypeMap.isEmpty())
            return null;

        // start making the VariantContext
        // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
-        
-        final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
+        final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
        final int eventLength = getEventLength(alleleList);

-        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
+        final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);

        // create the genotypes; no-call everyone for now
        GenotypesContext genotypes = GenotypesContext.create();
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        return indelLikelihoodMap.get();
    }

-    public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
-        Allele refAllele = alleles.get(0);
-        int endLoc = loc.getStart() + refAllele.length()-1;
-        if (allelesArePadded)
-            endLoc++;
-
-        return endLoc;
-    }
-
    public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
                                                 final ReferenceContext ref,
                                                 final GenomeLoc loc,
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood

    }
    
-    public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
+    public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
                                                    final ReferenceContext ref,
                                                    final Map<String, AlignmentContext> contexts,
                                                    final AlignmentContextUtils.ReadOrientation contextType,
                                                    final GenomeLocParser locParser,
                                                    final UnifiedArgumentCollection UAC,
                                                    final boolean ignoreSNPAllelesWhenGenotypingIndels) {
-        
+
        List<Allele> alleles = new ArrayList<Allele>();
-        boolean allelesArePadded = true;
        if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
            VariantContext vc = null;
            for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            }
           // ignore places where we don't have a variant
            if (vc == null)
-                return new Pair<List<Allele>,Boolean>(alleles,false);
+                return alleles;

            if (ignoreSNPAllelesWhenGenotypingIndels) {
                // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            } else {
                alleles.addAll(vc.getAlleles());
            }
-            if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
-                allelesArePadded = false;
-
-

        } else {
-            alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
+            alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
        }
-        return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
+        return alleles;
    }

    // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements );
    }

-    public class BAQedPileupElement extends PileupElement {
+    public static class BAQedPileupElement extends PileupElement {
        public BAQedPileupElement( final PileupElement PE ) {
            super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -26,11 +26,12 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;


-public class UnifiedArgumentCollection {
+public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {

    @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
    public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
@ -42,13 +43,6 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
    protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;

-    /**
-     * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
-     * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
-     */
-    @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
-    public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
-
    /**
     * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
     * distinguish between PCR errors vs. sequencing errors.  The practical implication for this value is that it
@ -57,26 +51,6 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
    public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;

-    @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
-    public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
-
-    @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
-    public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
-
-    /**
-     * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
-     * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
-     * is the default).
-     */
-    @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false)
-    public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
-
-    /**
-     * This argument allows you to emit low quality calls as filtered records.
-     */
-    @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false)
-    public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
-
    /**
     * Note that calculating the SLOD increases the runtime by an appreciable amount.
     */
@ -90,12 +64,6 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false)
    public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;

-    /**
-     * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
-     */
-    @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
-    public RodBinding<VariantContext> alleles;
-
    /**
     * The minimum confidence needed in a given base for it to be used in variant calling.  Note that the base quality of a base
     * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
@ -107,18 +75,8 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
    public Double MAX_DELETION_FRACTION = 0.05;

-    /**
-     * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
-     * then only this many alleles will be used.  Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
-     * scales exponentially based on the number of alternate alleles.  Unless there is a good reason to change the default value, we highly recommend
-     * that you not play around with this parameter.
-     */
-    @Advanced
-    @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
-    public int MAX_ALTERNATE_ALLELES = 3;
-
    @Hidden
-    @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
+    @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false)
    public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false;

    // indel-related arguments
@ -139,19 +97,18 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
    public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;

-
    /**
     * This argument informs the prior probability of having an indel at a site.
     */
    @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
    public double INDEL_HETEROZYGOSITY = 1.0/8000;

-    @Hidden
-    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
+    @Advanced
+    @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false)
    public byte INDEL_GAP_CONTINUATION_PENALTY = 10;

-    @Hidden
-    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
+    @Advanced
+    @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability.  I.e., 30 => 10^-30/10", required = false)
    public byte INDEL_GAP_OPEN_PENALTY = 45;

    @Hidden
@ -181,7 +138,6 @@ public class UnifiedArgumentCollection {
       Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only
       sample-level error model
     */
-
    @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false)
    public boolean IGNORE_LANE_INFO = false;

@ -263,7 +219,6 @@ public class UnifiedArgumentCollection {
        uac.referenceSampleName = referenceSampleName;
        uac.samplePloidy = samplePloidy;
        uac.maxQualityScore = minQualityScore;
-        uac.maxQualityScore = maxQualityScore;
        uac.phredScaledPrior = phredScaledPrior;
        uac.minPower = minPower;
        uac.minReferenceDepth = minReferenceDepth;
@ -276,5 +231,16 @@ public class UnifiedArgumentCollection {
        return uac;
    }

+    public UnifiedArgumentCollection() { }

+    public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) {
+        super();
+        this.alleles = SCAC.alleles;
+        this.GenotypingMode = SCAC.GenotypingMode;
+        this.heterozygosity = SCAC.heterozygosity;
+        this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
+        this.OutputMode = SCAC.OutputMode;
+        this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
+        this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -82,7 +82,7 @@ import java.util.*;
 *   -o snps.raw.vcf \
 *   -stand_call_conf [50.0] \
 *   -stand_emit_conf 10.0 \
- *   -dcov [50] \
+ *   -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \
 *   [-L targets.interval_list]
 * </pre>
 *
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
        } else {
            // in full mode: check for consistency in ploidy/pool calling arguments
            // check for correct calculation models
-            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
+/*            if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
                // polyploidy requires POOL GL and AF calculation models to be specified right now
                if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
                        && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH)   {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
                    throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");

            }
+  */
            // get all of the unique sample names
            if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
                samples.clear();
@ -311,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif

        // add the pool values for each genotype
        if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
-            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool"));
-            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool"));
+            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
+            headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
        }
        if (UAC.referenceSampleName != null) {
            headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));
--- a/Show More
+++ b/Show More