Fix merge issues when incorporating new AF calculations changes

2012-03-27 15:00:44 -04:00 · 2012-03-27 15:00:44 -04:00 · 343a061b1c
parent 1c424c0daf ea9c04b8c2
commit 343a061b1c
26 changed files with 881 additions and 614 deletions
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-Copyright (c) 2011 The Broad Institute
+Copyright (c) 2012 The Broad Institute

 Permission is hereby granted, free of charge, to any person
 obtaining a copy of this software and associated documentation
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java
@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.report;

 import org.apache.commons.lang.math.NumberUtils;

-import java.util.Arrays;
-import java.util.Collection;
-import java.util.TreeMap;
+import java.util.*;

 /**
 * Holds values for a column in a GATK report table
 */
-public class GATKReportColumn extends TreeMap<Object, Object> {
+public class GATKReportColumn extends LinkedHashMap<Object, Object> {
    final private String columnName;
    final private Object defaultValue;
    final private String format;
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -756,10 +756,6 @@ public class GATKReportTable {
        */

        // Get the column widths for everything
-        HashMap<String, GATKReportColumnFormat> columnFormats = new HashMap<String, GATKReportColumnFormat>();
-        for (String columnName : columns.keySet()) {
-            columnFormats.put(columnName, columns.get(columnName).getColumnFormat());
-        }
        String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";

        // Emit the table definition
@ -787,7 +783,7 @@ public class GATKReportTable {
                if (needsPadding) {
                    out.printf("  ");
                }
-                out.printf(columnFormats.get(columnName).getNameFormat(), columnName);
+                out.printf(columns.get(columnName).getColumnFormat().getNameFormat(), columnName);

                needsPadding = true;
            }
@ -796,29 +792,31 @@ public class GATKReportTable {
        out.printf("%n");

        // Emit the table body
-        for (Object primaryKey : primaryKeyColumn) {
+        for (final Object primaryKey : primaryKeyColumn) {
            needsPadding = false;
            if (primaryKeyDisplay) {
                out.printf(primaryKeyFormat, primaryKey);
                needsPadding = true;
            }

-            for (String columnName : columns.keySet()) {
-                if (columns.get(columnName).isDisplayable()) {
+            for (final Map.Entry<String, GATKReportColumn> entry : columns.entrySet()) {
+                final GATKReportColumn column = entry.getValue();
+                if (column.isDisplayable()) {
                    if (needsPadding) {
-                        out.printf("  ");
+                        out.print("  ");
                    }
-                    String value = columns.get(columnName).getStringValue(primaryKey);
-                    out.printf(columnFormats.get(columnName).getValueFormat(), value);
+
+                    final String value = column.getStringValue(primaryKey);
+                    out.printf(column.getColumnFormat().getValueFormat(), value);

                    needsPadding = true;
                }
            }

-            out.printf("%n");
+            out.println();
        }

-        out.printf("%n");
+        out.println();
    }

    public int getNumRows() {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

@ -68,6 +69,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
    @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
    public boolean SHOW_INDEL_PILEUPS = false;

+    @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output")
+    public boolean SHOW_VERBOSE = false;
+
    @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
    public List<RodBinding<Feature>> rods = Collections.emptyList();

@ -82,7 +86,10 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR

        if ( context.hasBasePileup() ) {
            ReadBackedPileup basePileup = context.getBasePileup();
-            out.printf("%s %s%n", basePileup.getPileupString(ref.getBaseAsChar()), rods);
+            out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
+            if ( SHOW_VERBOSE )
+                out.printf(" %s", createVerboseOutput(basePileup));
+            out.println();
        }

        if ( context.hasExtendedEventPileup() ) {
@ -125,6 +132,24 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR

        return rodString;
    }
+    
+    private static String createVerboseOutput(final ReadBackedPileup pileup) {
+        final StringBuilder sb = new StringBuilder();
+        boolean isFirst = true;
+
+        for ( PileupElement p : pileup ) {
+            if ( isFirst )
+                isFirst = false;
+            else
+                sb.append(",");
+            sb.append(p.getRead().getReadName());
+            sb.append(":");
+            sb.append(p.getOffset());
+            sb.append(":");
+            sb.append(p.getRead().getReadLength());
+        }
+        return sb.toString();
+    }

    @Override
    public void onTraversalDone(Integer result) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java
@ -91,6 +91,25 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
            this.cycle = cycle;
        }

+        // Must overload hashCode and equals to properly work with GATKReportColumn
+        @Override
+        public int hashCode() {
+            return readGroup.hashCode() + 33 * cycle;
+        }
+
+        @Override
+        public boolean equals(final Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+
+            final TableKey oKey = (TableKey) o;
+
+            if ( cycle != oKey.cycle ) return false;
+            if ( !readGroup.equals(oKey.readGroup) ) return false;
+
+            return true;
+        }
+
        @Override
        public int compareTo(final TableKey tableKey) {
            final int scmp = readGroup.compareTo(tableKey.readGroup);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@ -70,6 +70,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
     * @return the alleles used for genotyping
     */
    protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
-                                                    final double[][] log10AlleleFrequencyPriors,
+                                                    final double[] log10AlleleFrequencyPriors,
                                                    final AlleleFrequencyCalculationResult result);
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java
@ -25,6 +25,11 @@

 package org.broadinstitute.sting.gatk.walkers.genotyper;

+import org.broadinstitute.sting.utils.MathUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
 /**
 * Created by IntelliJ IDEA.
 * User: ebanks
@ -34,23 +39,54 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 */
 public class AlleleFrequencyCalculationResult {

-    // IMPORTANT NOTE:
-    // These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
-    // For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles.  What this means is that
-    // the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
-    // be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
-    // In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
-    // the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
-    final double[][] log10AlleleFrequencyLikelihoods;
-    final double[][] log10AlleleFrequencyPosteriors;
+    // These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
+    private double log10MLE;
+    private double log10MAP;
+    final private int[] alleleCountsOfMLE;
+    final private int[] alleleCountsOfMAP;

-    // These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
-    double log10LikelihoodOfAFzero = 0.0;
-    double log10PosteriorOfAFzero = 0.0;
+    // The posteriors seen, not including that of AF=0
+    // TODO -- better implementation needed here (see below)
+    private ArrayList<Double> log10PosteriorMatrixValues = new ArrayList<Double>(100000);
+    private Double log10PosteriorMatrixSum = null;

-    public AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
-        log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
-        log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
+    // These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
+    private double log10LikelihoodOfAFzero;
+    private double log10PosteriorOfAFzero;
+
+
+    public AlleleFrequencyCalculationResult(final int maxAltAlleles) {
+        alleleCountsOfMLE = new int[maxAltAlleles];
+        alleleCountsOfMAP = new int[maxAltAlleles];
+        reset();
+    }
+
+    public double getLog10MLE() {
+        return log10MLE;
+    }
+
+    public double getLog10MAP() {
+        return log10MAP;
+    }
+
+    public double getLog10PosteriorMatrixSum() {
+        if ( log10PosteriorMatrixSum == null ) {
+            // TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory;
+            // TODO --   will discuss with the team what the best option is
+            final double[] tmp = new double[log10PosteriorMatrixValues.size()];
+            for ( int i = 0; i < tmp.length; i++ )
+                tmp[i] = log10PosteriorMatrixValues.get(i);
+            log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp);
+        }
+        return log10PosteriorMatrixSum;
+    }
+
+    public int[] getAlleleCountsOfMLE() {
+        return alleleCountsOfMLE;
+    }
+
+    public int[] getAlleleCountsOfMAP() {
+        return alleleCountsOfMAP;
    }

    public double getLog10LikelihoodOfAFzero() {
@ -60,4 +96,47 @@ public class AlleleFrequencyCalculationResult {
    public double getLog10PosteriorOfAFzero() {
        return log10PosteriorOfAFzero;
    }
+
+    public void reset() {
+        log10MLE = log10MAP = log10LikelihoodOfAFzero = log10PosteriorOfAFzero = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
+        for ( int i = 0; i < alleleCountsOfMLE.length; i++ ) {
+            alleleCountsOfMLE[i] = 0;
+            alleleCountsOfMAP[i] = 0;
+        }
+        log10PosteriorMatrixValues.clear();
+        log10PosteriorMatrixSum = null;
+    }
+
+    public void updateMLEifNeeded(final double log10LofK, final int[] alleleCountsForK) {
+        if ( log10LofK > log10MLE ) {
+            log10MLE = log10LofK;
+            for ( int i = 0; i < alleleCountsForK.length; i++ )
+                alleleCountsOfMLE[i] = alleleCountsForK[i];
+        }
+    }
+
+    public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
+        log10PosteriorMatrixValues.add(log10LofK);
+        if ( log10LofK > log10MAP ) {
+            log10MAP = log10LofK;
+            for ( int i = 0; i < alleleCountsForK.length; i++ )
+                alleleCountsOfMAP[i] = alleleCountsForK[i];
+        }
+    }
+
+    public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
+        this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
+        if ( log10LikelihoodOfAFzero > log10MLE ) {
+            log10MLE = log10LikelihoodOfAFzero;
+            Arrays.fill(alleleCountsOfMLE, 0);
+        }
+    }
+
+    public void setLog10PosteriorOfAFzero(final double log10PosteriorOfAFzero) {
+        this.log10PosteriorOfAFzero = log10PosteriorOfAFzero;
+        if ( log10PosteriorOfAFzero > log10MAP ) {
+            log10MAP = log10PosteriorOfAFzero;
+            Arrays.fill(alleleCountsOfMAP, 0);
+        }
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2012, The Broad Institute 
+ *
+ * Permission is hereby granted, free of charge, to any person 
+ * obtaining a copy of this software and associated documentation 
+ * files (the "Software"), to deal in the Software without 
+ * restriction, including without limitation the rights to use, 
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell 
+ * copies of the Software, and to permit persons to whom the 
+ * Software is furnished to do so, subject to the following 
+ * conditions: 
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software. 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.*;
+
+/**
+ * Code for determining which indels are segregating among the samples.
+ *
+ * This code is just a refactor of the original code from Guillermo in the UG.
+ *
+ * @author Mark DePristo
+ * @since 3/26/12
+ */
+public class ConsensusAlleleCounter {
+    final protected static Logger logger = Logger.getLogger(ConsensusAlleleCounter.class);
+    private final int minIndelCountForGenotyping;
+    private final boolean doMultiAllelicCalls;
+    private final double minFractionInOneSample;
+    private final GenomeLocParser locParser;
+
+    public ConsensusAlleleCounter(final GenomeLocParser locParser,
+                                  final boolean doMultiAllelicCalls,
+                                  final int minIndelCountForGenotyping,
+                                  final double minFractionInOneSample) {
+        this.minIndelCountForGenotyping = minIndelCountForGenotyping;
+        this.doMultiAllelicCalls = doMultiAllelicCalls;
+        this.minFractionInOneSample = minFractionInOneSample;
+        this.locParser = locParser;
+    }
+
+    /**
+     * Returns a list of Alleles at this locus that may be segregating
+     * 
+     * @param ref
+     * @param contexts
+     * @param contextType
+     * @return
+     */
+    public List<Allele> computeConsensusAlleles(ReferenceContext ref,
+                                                Map<String, AlignmentContext> contexts,
+                                                AlignmentContextUtils.ReadOrientation contextType) {
+        final Map<String, Integer> consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType);
+//        logger.info("Alleles at " + ref.getLocus());
+//        for ( Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet() ) {
+//            logger.info("    " + elt.getValue() + " => " + elt.getKey());
+//        }
+        return consensusCountsToAlleles(ref, consensusIndelStrings);
+    }
+
+    //
+    // TODO -- WARNING DOESN'T WORK WITH REDUCED READS
+    //
+    private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
+                                                       Map<String, AlignmentContext> contexts,
+                                                       AlignmentContextUtils.ReadOrientation contextType) {
+        final GenomeLoc loc = ref.getLocus();
+        HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
+
+        int insCount = 0, delCount = 0;
+        // quick check of total number of indels in pileup
+        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
+            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
+
+            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
+            insCount += indelPileup.getNumberOfInsertions();
+            delCount += indelPileup.getNumberOfDeletions();
+        }
+
+        if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
+            return Collections.emptyMap();
+
+        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
+            // todo -- warning, can be duplicating expensive partition here
+            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
+
+            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
+
+            final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
+            final int nReadsOverall = indelPileup.getNumberOfElements();
+            if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
+//                if ( nIndelReads > 0 )
+//                    logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
+                continue;
+//            } else {
+//                logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
+            }
+
+            for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
+                final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+                if (read == null)
+                    continue;
+                if (ReadUtils.is454Read(read)) {
+                    continue;
+                }
+
+/*                if (DEBUG && p.isIndel()) {
+                    System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
+                            read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
+                            p.getEventLength(),p.getType().toString(), p.getEventBases());
+                }
+   */
+                String indelString = p.getEventBases();
+                if (p.isInsertion()) {
+                    boolean foundKey = false;
+                    // copy of hashmap into temp arrayList
+                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
+                    for (String s : consensusIndelStrings.keySet()) {
+                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    }
+
+                    if (read.getAlignmentEnd() == loc.getStart()) {
+                        // first corner condition: a read has an insertion at the end, and we're right at the insertion.
+                        // In this case, the read could have any of the inserted bases and we need to build a consensus
+
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            // case 1: current insertion is prefix of indel in hash map
+                            if (s.startsWith(indelString)) {
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
+                                foundKey = true;
+                            }
+                            else if (indelString.startsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
+                                // In this case, new bases are new key.
+                                foundKey = true;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
+                            }
+                        }
+                        if (!foundKey)
+                            // none of the above: event bases not supported by previous table, so add new key
+                            cList.add(new Pair<String, Integer>(indelString,1));
+
+                    }
+                    else if (read.getAlignmentStart() == loc.getStart()+1) {
+                        // opposite corner condition: read will start at current locus with an insertion
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            if (s.endsWith(indelString)) {
+                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
+                                foundKey = true;
+                            }
+                            else if (indelString.endsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
+                                // In this case, new bases are new key.
+                                foundKey = true;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
+                            }
+                        }
+                        if (!foundKey)
+                            // none of the above: event bases not supported by previous table, so add new key
+                            cList.add(new Pair<String, Integer>(indelString,1));
+
+
+                    }
+                    else {
+                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
+                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
+                    }
+
+                    // copy back arrayList into hashMap
+                    consensusIndelStrings.clear();
+                    for (Pair<String,Integer> pair : cList) {
+                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
+                    }
+
+                }
+                else if (p.isDeletion()) {
+                    indelString = String.format("D%d",p.getEventLength());
+                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                    consensusIndelStrings.put(indelString,cnt+1);
+
+                }
+            }
+        }
+
+        return consensusIndelStrings;
+    }
+
+    private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
+                                                  final Map<String, Integer> consensusIndelStrings) {
+        final GenomeLoc loc = ref.getLocus();
+        final Collection<VariantContext> vcs = new ArrayList<VariantContext>();
+        int maxAlleleCnt = 0;
+        Allele refAllele, altAllele;
+
+        for (final Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet()) {
+            final String s = elt.getKey();
+            final int curCnt = elt.getValue();
+            int stop = 0;
+
+            // if observed count if above minimum threshold, we will genotype this allele
+            if (curCnt < minIndelCountForGenotyping)
+                continue;
+
+            if (s.startsWith("D")) {
+                // get deletion length
+                final int dLen = Integer.valueOf(s.substring(1));
+                // get ref bases of accurate deletion
+                final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
+                stop = loc.getStart() + dLen;
+                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
+
+                if (Allele.acceptableAlleleBases(refBases)) {
+                    refAllele = Allele.create(refBases, true);
+                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+                }
+                else continue; // don't go on with this allele if refBases are non-standard
+            } else {
+                // insertion case
+                if (Allele.acceptableAlleleBases(s)) {
+                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
+                    altAllele = Allele.create(s, false);
+                    stop = loc.getStart();
+                }
+                else continue; // go on to next allele if consensus insertion has any non-standard base.
+            }
+
+
+            final VariantContextBuilder builder = new VariantContextBuilder().source("");
+            builder.loc(loc.getContig(), loc.getStart(), stop);
+            builder.alleles(Arrays.asList(refAllele, altAllele));
+            builder.referenceBaseForIndel(ref.getBase());
+            builder.noGenotypes();
+            if (doMultiAllelicCalls)
+                vcs.add(builder.make());
+            else {
+                if (curCnt > maxAlleleCnt) {
+                    maxAlleleCnt = curCnt;
+                    vcs.clear();
+                    vcs.add(builder.make());
+                }
+
+            }
+        }
+
+        if (vcs.isEmpty())
+            return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
+
+        final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
+        return mergedVC.getAlleles();
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -52,11 +52,9 @@ import java.util.*;
 public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
    private final int HAPLOTYPE_SIZE;

-    private final int minIndelCountForGenotyping;
    private final boolean getAlleleListFromVCF;

    private boolean DEBUG = false;
-    private final boolean doMultiAllelicCalls = true;
    private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
    private PairHMMIndelErrorModel pairModel;

@ -72,217 +70,30 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    // gdebug removeme
    // todo -cleanup
    private GenomeLoc lastSiteVisited;
-    private ArrayList<Allele> alleleList;
+    private List<Allele> alleleList = new ArrayList<Allele>();

    static {
        indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
    }


-    public IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
+    protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
                UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
-        alleleList = new ArrayList<Allele>();
        getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
-        minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
        HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
        DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
-
        haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
        ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
    }

-
-    public static ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
-                                                      Map<String, AlignmentContext> contexts,
-                                                      AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser,
-                                                      int minIndelCountForGenotyping, boolean doMultiAllelicCalls) {
-        Allele refAllele = null, altAllele = null;
-        GenomeLoc loc = ref.getLocus();
-        ArrayList<Allele> aList = new ArrayList<Allele>();
-
-        HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
-
-        int insCount = 0, delCount = 0;
-        // quick check of total number of indels in pileup
-        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
-            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
-
-            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
-            insCount += indelPileup.getNumberOfInsertions();
-            delCount += indelPileup.getNumberOfDeletions();
-        }
-
-        if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
-            return aList;
-
-        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
-            // todo -- warning, can be duplicating expensive partition here
-            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
-
-            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
-
-
-            for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
-                //SAMRecord read = p.getRead();
-                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
-                if (read == null)
-                    continue;
-                if (ReadUtils.is454Read(read)) {
-                    continue;
-                }
-
-/*                if (DEBUG && p.isIndel()) {
-                    System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
-                            read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
-                            p.getEventLength(),p.getType().toString(), p.getEventBases());
-                }
-   */
-
-                String indelString = p.getEventBases();
-                if (p.isInsertion()) {
-                    boolean foundKey = false;
-                    // copy of hashmap into temp arrayList
-                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
-                    for (String s : consensusIndelStrings.keySet()) {
-                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
-                    }
-
-                    if (read.getAlignmentEnd() == loc.getStart()) {
-                        // first corner condition: a read has an insertion at the end, and we're right at the insertion.
-                        // In this case, the read could have any of the inserted bases and we need to build a consensus
-
-                        for (int k=0; k < cList.size(); k++) {
-                            String s = cList.get(k).getFirst();
-                            int cnt = cList.get(k).getSecond();
-                            // case 1: current insertion is prefix of indel in hash map
-                            if (s.startsWith(indelString)) {
-                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
-                                foundKey = true;
-                            }
-                            else if (indelString.startsWith(s)) {
-                                // case 2: indel stored in hash table is prefix of current insertion
-                                // In this case, new bases are new key.
-                                foundKey = true;
-                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
-                            }
-                        }
-                        if (!foundKey)
-                            // none of the above: event bases not supported by previous table, so add new key
-                            cList.add(new Pair<String, Integer>(indelString,1));
-
-                    }
-                    else if (read.getAlignmentStart() == loc.getStart()+1) {
-                        // opposite corner condition: read will start at current locus with an insertion
-                        for (int k=0; k < cList.size(); k++) {
-                            String s = cList.get(k).getFirst();
-                            int cnt = cList.get(k).getSecond();
-                            if (s.endsWith(indelString)) {
-                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
-                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
-                                foundKey = true;
-                            }
-                            else if (indelString.endsWith(s)) {
-                                // case 2: indel stored in hash table is prefix of current insertion
-                                // In this case, new bases are new key.
-                                foundKey = true;
-                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
-                            }
-                        }
-                        if (!foundKey)
-                            // none of the above: event bases not supported by previous table, so add new key
-                            cList.add(new Pair<String, Integer>(indelString,1));
-
-
-                    }
-                    else {
-                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
-                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
-                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
-                    }
-
-                    // copy back arrayList into hashMap
-                    consensusIndelStrings.clear();
-                    for (Pair<String,Integer> pair : cList) {
-                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
-                    }
-
-                }
-                else if (p.isDeletion()) {
-                    indelString = String.format("D%d",p.getEventLength());
-                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
-                    consensusIndelStrings.put(indelString,cnt+1);
-
-                }
-            }
-
-        }
-
-        Collection<VariantContext> vcs = new ArrayList<VariantContext>();
-        int maxAlleleCnt = 0;
-        String bestAltAllele = "";
-
-        for (String s : consensusIndelStrings.keySet()) {
-            int curCnt = consensusIndelStrings.get(s), stop = 0;
-            // if observed count if above minimum threshold, we will genotype this allele
-            if (curCnt < minIndelCountForGenotyping)
-                continue;
-
-            if (s.startsWith("D")) {
-                // get deletion length
-                int dLen = Integer.valueOf(s.substring(1));
-                // get ref bases of accurate deletion
-                int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
-                stop = loc.getStart() + dLen;
-                byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
-
-                if (Allele.acceptableAlleleBases(refBases)) {
-                    refAllele = Allele.create(refBases, true);
-                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
-                }
-                else continue; // don't go on with this allele if refBases are non-standard
-            } else {
-                // insertion case
-                if (Allele.acceptableAlleleBases(s)) {
-                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
-                    altAllele = Allele.create(s, false);
-                    stop = loc.getStart();
-                }
-                else continue; // go on to next allele if consensus insertion has any non-standard base.
-            }
-
-
-            ArrayList vcAlleles = new ArrayList<Allele>();
-            vcAlleles.add(refAllele);
-            vcAlleles.add(altAllele);
-
-            final VariantContextBuilder builder = new VariantContextBuilder().source("");
-            builder.loc(loc.getContig(), loc.getStart(), stop);
-            builder.alleles(vcAlleles);
-            builder.referenceBaseForIndel(ref.getBase());
-            builder.noGenotypes();
-            if (doMultiAllelicCalls)
-                vcs.add(builder.make());
-            else {
-                if (curCnt > maxAlleleCnt) {
-                    maxAlleleCnt = curCnt;
-                    vcs.clear();
-                    vcs.add(builder.make());
-                }
-
-            }
-        }
-
-        if (vcs.isEmpty())
-            return aList; // nothing else to do, no alleles passed minimum count criterion
-
-        VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
-
-        aList = new ArrayList<Allele>(mergedVC.getAlleles());
-
-        return aList;
-
+    private List<Allele> computeConsensusAlleles(ReferenceContext ref,
+                                                 Map<String, AlignmentContext> contexts,
+                                                 AlignmentContextUtils.ReadOrientation contextType,
+                                                 GenomeLocParser locParser) {
+        ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
+        return counter.computeConsensusAlleles(ref, contexts, contextType);
    }

    private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
@ -338,7 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                }

            } else {
-                alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls);
+                alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser);
                if (alleleList.isEmpty())
                    return null;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java
@ -1,209 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.genotyper;
-
-import org.apache.commons.lang.NotImplementedException;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.RodWalker;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.variantcontext.*;
-
-import java.security.cert.CertificateNotYetValidException;
-import java.util.*;
-
-import org.broadinstitute.sting.utils.codecs.vcf.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 8/30/11
- * Time: 10:08 AM
- * To change this template use File | Settings | File Templates.
- */
-public class UGBoundAF extends RodWalker<VariantContext,Integer> {
-
-    @Output(shortName="vcf",fullName="VCF",doc="file to write to",required=true)
-    VCFWriter writer;
-
-    @Input(shortName="V",fullName="Variants",doc="variant tracks to use in calculation",required=true)
-    List<RodBinding<VariantContext>> variants;
-
-    private static double EPS_LOWER_LIMIT = Math.pow(10,-6.0);
-
-    private HashMap<Integer,Pair<Double,Double>> epsilonPosteriorCache = new HashMap<Integer,Pair<Double,Double>>(8192);
-    private HashMap<Integer,Double> logAC0Cache = new HashMap<Integer, Double>(8192);
-    private int QUANTIZATION_FACTOR = 1000;
-
-
-    public void initialize() {
-        Set<VCFHeaderLine> allHeaderLines = new HashSet<VCFHeaderLine>(1024);
-        for ( RodBinding<VariantContext> v : variants ) {
-            String trackName = v.getName();
-            Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
-            Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(vcfHeaders.get(trackName).getMetaData());
-        }
-        allHeaderLines.add(new VCFInfoHeaderLine("AFB",2,VCFHeaderLineType.Float,"The 95% bounds on the allele "+
-                "frequency. First value=95% probability AF>x. Second value=95% probability AF<x."));
-        writer.writeHeader(new VCFHeader(allHeaderLines));
-    }
-    public Integer reduceInit() {
-        return 0;
-    }
-
-    @Override
-    public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext unused ) {
-        List<VariantContext> allVariants = tracker.getValues(variants);
-        if ( allVariants.size() == 0 ) {
-            return null;
-        }
-
-        List<Allele> alternateAlleles = getAllAlternateAlleles(allVariants);
-        VariantContextBuilder builder = new VariantContextBuilder(allVariants.get(0).subContextFromSamples(new TreeSet<String>()));
-        if ( alternateAlleles.size() > 1 ) {
-            logger.warn("Multiple Segregating Variants at position "+ref.getLocus().toString());
-            alternateAlleles.add(allVariants.get(0).getReference());
-            builder.alleles(alternateAlleles);
-            builder.filters(String.format("MULTIPLE_SEGREGATING[%s]", Utils.join(",",alternateAlleles)));
-        } else {
-            // get all the genotype likelihoods
-            GenotypesContext context = GenotypesContext.create();
-            int numNoCall = 0;
-            for ( VariantContext v : allVariants ) {
-                numNoCall += v.getNoCallCount();
-                context.addAll(v.getGenotypes());
-            }
-            builder.attribute("AFB",boundAlleleFrequency(getACPosteriors(context)));
-        }
-
-        return builder.make();
-    }
-
-    private List<Allele> getAllAlternateAlleles(List<VariantContext> variants) {
-        List<Allele> alleles = new ArrayList<Allele>(3); // some overhead
-        for ( VariantContext v : variants ) {
-            alleles.addAll(v.getAlternateAlleles());
-        }
-        return alleles;
-    }
-
-    @Override
-    public Integer reduce(VariantContext value, Integer sum) {
-        if ( value == null )
-            return sum;
-        writer.add(value);
-        return ++sum;
-    }
-
-    private int N_ITERATIONS = 1;
-    private double[] getACPosteriors(GenotypesContext gc) {
-        // note this uses uniform priors (!)
-
-        double[][] zeroPriors = new double[1][1+2*gc.size()];
-        AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2,2*gc.size());
-        // todo -- allow multiple alleles here
-        for ( int i = 0; i < N_ITERATIONS; i ++ ) {
-            ExactAFCalculationModel.linearExactMultiAllelic(gc, 2, zeroPriors, result, false);
-        }
-
-        return result.log10AlleleFrequencyPosteriors[0];
-    }
-
-    private String boundAlleleFrequency(double[] ACLikelihoods) {
-        // note that no-calls are unnecessary: the ML likelihoods take nocalls into account as 0,0,0 GLs
-        // thus, for sites with K 100,40,0 likelihoods and M no-calls, the likelihoods will be
-        // agnostic between 2*K alleles through 2*(K+M) alleles - exactly what we want to marginalize over
-
-        // want to pick a lower limit x and upper limit y such that
-        // int_{f = x to y} sum_{c = 0 to 2*AN} P(AF=f | c, AN) df = 0.95
-        // int_{f=x to y} calculateAFPosterior(f) df = 0.95
-        // and that (y-x) is minimized
-
-        // this is done by quantizing [0,1] into small bins and, since the distribution is
-        // unimodal, greedily adding them until the probability is >= 0.95
-
-        throw new ReviewedStingException("This walker is unsupported, and is not fully implemented", new NotImplementedException("bound allele frequency not implemented"));
-    }
-
-    private double calculateAFPosterior(double[] likelihoods, double af) {
-        double[] probLiks = new double[likelihoods.length];
-        for ( int c = 0; c < likelihoods.length; c++) {
-            probLiks[c] = calculateAFPosterior(c,likelihoods.length,af);
-        }
-
-        return MathUtils.log10sumLog10(probLiks);
-    }
-
-    private double calculateAFPosterior(int ac, int n, double af) {
-        // evaluate the allele frequency posterior distribution at AF given AC observations of N chromosomes
-        switch ( ac ) {
-            case 0:
-                return logAC0Coef(n) + n*Math.log10(1 - af) - Math.log10(af);
-            case 1:
-                return Math.log10(n) + (n-1)*Math.log10(1-af) - n*Math.log10(1-EPS_LOWER_LIMIT);
-            case 2:
-                return Math.log10(n) + Math.log10(n-1) + Math.log10(af) + (n-2)*Math.log10(1-af) - Math.log10(1-(n-1)*EPS_LOWER_LIMIT) - (n-1)*Math.log10(EPS_LOWER_LIMIT);
-            default:
-                return  (ac-1)*Math.log10(af)+ac*Math.log10( (double) n-ac)-(n-ac)*af*Math.log10(Math.E) - MathUtils.log10Gamma(ac);
-        }
-    }
-
-    private double logAC0Coef(int an) {
-        if ( ! logAC0Cache.containsKey(an) ) {
-            double coef = -Math.log10(EPS_LOWER_LIMIT);
-            for ( int k = 1; k <= an; k++ ) {
-                // note this should typically just be
-                // term = ( 1 - Math.pow(EPS_LOWER_LIMIT,k) ) * MathUtils.binomialCoefficient(an,k) / k
-                // but the 1-E term will just be 1, so we do the following to mitigate this problem
-                double binom = MathUtils.binomialCoefficient(an,k);
-                double eps_correction = EPS_LOWER_LIMIT*Math.pow(binom,1/k);
-                double term = binom/k - Math.pow(eps_correction,k);
-                if ( k % 2 == 0 ) {
-                    coef += term;
-                }  else {
-                    coef -= term;
-                }
-            }
-
-            logAC0Cache.put(an,coef);
-        }
-
-        return logAC0Cache.get(an);
-    }
-
-    private double adaptiveSimpson(double[] likelihoods, double start, double stop, double err, int cap) {
-        double mid = (start + stop)/2;
-        double size = stop-start;
-        double fa = calculateAFPosterior(likelihoods,start);
-        double fb = calculateAFPosterior(likelihoods,mid);
-        double fc = calculateAFPosterior(likelihoods,stop);
-        double s = (size/6)*(fa + 4*fc + fb);
-        double h = simpAux(likelihoods,start,stop,err,s,fa,fb,fc,cap);
-        return h;
-    }
-
-    private double simpAux(double[] likelihoods, double a,double b,double eps,double s,double fa,double fb,double fc,double cap){
-        if ( s == 0 )
-                return -300.0;
-        double c = ( a + b )/2;
-        double h = b-a;
-        double d = (a + c)/2;
-        double e = (c + b)/2;
-        double fd = calculateAFPosterior(likelihoods, d);
-        double fe = calculateAFPosterior(likelihoods, e);
-        double s_l = (h/12)*(fa + 4*fd + fc);
-        double s_r = (h/12)*(fc + 4*fe + fb);
-        double s_2 = s_l + s_r;
-        if ( cap <= 0 || Math.abs(s_2 - s) <= 15*eps ){
-            return Math.log10(s_2 + (s_2 - s)/15.0);
-        }
-
-        return MathUtils.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -118,6 +118,17 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
    public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;

+    /**
+     * Complementary argument to minIndelCnt.  Only samples with at least this fraction of indel-containing reads will contribute
+     * to counting and overcoming the threshold minIndelCnt.  This parameter ensures that in deep data you don't end
+     * up summing lots of super rare errors up to overcome the 5 read default threshold.  Should work equally well for
+     * low-coverage and high-coverage samples, as low coverage samples with any indel containing reads should easily over
+     * come this threshold.
+     */
+    @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
+    public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
+
+
    /**
     * This argument informs the prior probability of having an indel at a site.
     */
@ -165,6 +176,7 @@ public class UnifiedArgumentCollection {
        uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
        uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
        uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
+        uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
        uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
        uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
        uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -137,6 +137,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    @Output(doc="File to which variants should be written",required=true)
    protected VCFWriter writer = null;

+    @Hidden
    @Argument(fullName = "debug_file", shortName = "debug_file", doc = "File to print all of the annotated and detailed debugging output", required = false)
    protected PrintStream verboseWriter = null;

@ -218,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen

        // initialize the verbose writer
        if ( verboseWriter != null )
-            verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
+            verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");

        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -84,8 +84,8 @@ public class UnifiedGenotyperEngine {
    private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();

    // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
-    private final double[][] log10AlleleFrequencyPriorsSNPs;
-    private final double[][] log10AlleleFrequencyPriorsIndels;
+    private final double[] log10AlleleFrequencyPriorsSNPs;
+    private final double[] log10AlleleFrequencyPriorsIndels;

    // the priors object
    private final GenotypePriors genotypePriorsSNPs;
@ -131,9 +131,8 @@ public class UnifiedGenotyperEngine {
        this.annotationEngine = engine;

        this.N = N;
-        
-        log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
-        log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1];
+        log10AlleleFrequencyPriorsSNPs = new double[N+1];
+        log10AlleleFrequencyPriorsIndels = new double[N+1];
        computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
        computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
        genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP);
@ -269,8 +268,8 @@ public class UnifiedGenotyperEngine {
        // initialize the data for this thread if that hasn't been done yet
        if ( afcm.get() == null ) {
            afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
-            alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
-            posteriorsArray.set(new double[N + 2]);
+            alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES));
+            posteriorsArray.set(new double[2]);
        }
        AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();

@ -283,9 +282,7 @@ public class UnifiedGenotyperEngine {
                    generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
        }

-        // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
-        clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
-        clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
+        AFresult.reset();
        List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);

        // is the most likely frequency conformation AC=0 for all alternate alleles?
@ -300,12 +297,11 @@ public class UnifiedGenotyperEngine {
            // the genotyping model may have stripped it out
            if ( indexOfAllele == -1 )
                continue;
-                
-            int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1]);

-            // if the most likely AC is not 0, then this is a good alternate allele to use;
-            // make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array
-            if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
+            int indexOfBestAC = AFresult.getAlleleCountsOfMAP()[indexOfAllele-1];
+
+            // if the most likely AC is not 0, then this is a good alternate allele to use
+            if ( indexOfBestAC != 0 ) {
                myAlleles.add(alternateAllele);
                bestGuessIsRef = false;
            }
@ -316,7 +312,6 @@ public class UnifiedGenotyperEngine {
        }

        // calculate p(f>0):
-        // because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
        final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
        final double PofF = 1.0 - normalizedPosteriors[0];

@ -324,18 +319,11 @@ public class UnifiedGenotyperEngine {
        if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
            phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
            if ( Double.isInfinite(phredScaledConfidence) )
-                phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero;
+                phredScaledConfidence = -10.0 * AFresult.getLog10PosteriorOfAFzero();
        } else {
            phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
            if ( Double.isInfinite(phredScaledConfidence) ) {
-                double sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
-                if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
-                    sum = 0.0;
-                for (int i = 1; i <= N; i++) {
-                    if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
-                        break;
-                    sum += AFresult.log10AlleleFrequencyPosteriors[0][i];
-                }
+                final double sum = AFresult.getLog10PosteriorMatrixSum();
                phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
            }
        }
@ -364,7 +352,7 @@ public class UnifiedGenotyperEngine {

        // print out stats if we have a writer
        if ( verboseWriter != null && !limitedContext )
-            printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
+            printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, model);

        // *** note that calculating strand bias involves overwriting data structures, so we do that last
        final HashMap<String, Object> attributes = new HashMap<String, Object>();
@ -378,29 +366,27 @@ public class UnifiedGenotyperEngine {

            // the overall lod
            //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
-            double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
+            double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum();
            //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);

            List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
            
            // the forward lod
            VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, alternateAllelesToUse, false, model);
-            clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
-            clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
+            AFresult.reset();
            afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
            //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
-            double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
-            double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
+            double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
+            double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum();
            //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);

            // the reverse lod
            VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, alternateAllelesToUse, false, model);
-            clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
-            clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
+            AFresult.reset();
            afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
            //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
-            double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
-            double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
+            double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
+            double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum();
            //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);

            double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -437,9 +423,9 @@ public class UnifiedGenotyperEngine {
        return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
    }

-    private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) {
-        normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero;
-        System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1);
+    public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
+        normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
+        normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum();
        return MathUtils.normalizeFromLog10(normalizedPosteriors);
    }

@ -499,14 +485,6 @@ public class UnifiedGenotyperEngine {
        return stratifiedContexts;
    }

-    protected static void clearAFarray(double[][] AFs) {
-        for ( int i = 0; i < AFs.length; i++ ) {
-            for ( int j = 0; j < AFs[i].length; j++ ) {
-                AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
-            }
-        }
-    }
-
    private final static double[] binomialProbabilityDepthCache = new double[10000];
    static {
        for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
@ -551,7 +529,7 @@ public class UnifiedGenotyperEngine {
        return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
    }

-    protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors, final GenotypeLikelihoodsCalculationModel.Model model) {
+    protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) {
        Allele refAllele = null, altAllele = null;
        for ( Allele allele : vc.getAlleles() ) {
            if ( allele.isReference() )
@ -574,11 +552,8 @@ public class UnifiedGenotyperEngine {
            AFline.append(i + "/" + N + "\t");
            AFline.append(String.format("%.2f\t", ((float)i)/N));
            AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
-            if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
-                AFline.append("0.00000000\t");                
-            else
-                AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]));
-            AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
+            AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MLE()));
+            AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().getLog10MAP()));
            verboseWriter.println(AFline.toString());
        }

@ -648,31 +623,29 @@ public class UnifiedGenotyperEngine {
        return null;
    }

-    protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) {
+    protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {

-        // the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i
-        for (int alleles = 1; alleles <= priors.length; alleles++) {
-            double sum = 0.0;
+        double sum = 0.0;

-            // for each i
-            for (int i = 1; i <= N; i++) {
-                double value = Math.pow(theta, alleles) / (double)i;
-                priors[alleles-1][i] = Math.log10(value);
-                sum += value;
-            }
-
-            // null frequency for AF=0 is (1 - sum(all other frequencies))
-            priors[alleles-1][0] = Math.log10(1.0 - sum);
+        // for each i
+        for (int i = 1; i <= N; i++) {
+            final double value = theta / (double)i;
+            priors[i] = Math.log10(value);
+            sum += value;
        }
+
+        // null frequency for AF=0 is (1 - sum(all other frequencies))
+        priors[0] = Math.log10(1.0 - sum);
    }

-    protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
+    protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
        if (model.name().toUpperCase().contains("SNP"))
            return log10AlleleFrequencyPriorsSNPs;
        else if (model.name().toUpperCase().contains("INDEL"))
            return log10AlleleFrequencyPriorsIndels;
        else
            throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
+
    }

    private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java
@ -25,19 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;

 import org.broadinstitute.sting.gatk.walkers.genotyper.AlleleFrequencyCalculationResult;
 import org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 import java.util.TreeSet;


 public class GLBasedSampleSelector extends SampleSelector {
-    Map<Integer,double[][]> numAllelePriorMatrix = new HashMap<Integer,double[][]>();
+    double[] flatPriors = null;
    double referenceLikelihood;
    public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
        super(sm);
@ -53,9 +47,11 @@ public class GLBasedSampleSelector extends SampleSelector {

        // now check to see (using EXACT model) whether this should be variant
        // do we want to apply a prior? maybe user-spec?
-        double[][] flatPrior = createFlatPrior(vc.getAlleles());
-        AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size(),2*samples.size());
-        ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPrior,result,true);
+        if ( flatPriors == null ) {
+            flatPriors = new double[1+2*samples.size()];
+        }
+        AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(vc.getAlternateAlleles().size());
+        ExactAFCalculationModel.linearExactMultiAllelic(subContext.getGenotypes(),vc.getAlternateAlleles().size(),flatPriors,result);
        // do we want to let this qual go up or down?
        if ( result.getLog10PosteriorOfAFzero() < referenceLikelihood ) {
            return true;
@ -63,12 +59,4 @@ public class GLBasedSampleSelector extends SampleSelector {

        return false;
    }
-
-    private double[][] createFlatPrior(List<Allele> alleles) {
-        if ( ! numAllelePriorMatrix.containsKey(alleles.size()) ) {
-            numAllelePriorMatrix.put(alleles.size(), new double[alleles.size()][1+2*samples.size()]);
-        }
-
-        return numAllelePriorMatrix.get(alleles.size());
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -371,18 +371,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
                            // find the comp
                            final VariantContext comp = findMatchingComp(eval, compSet);

-                            HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>();
-                            for ( VariantStratifier vs : stratificationObjects ) {
-                                List<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
-                                stateMap.put(vs, states);
-                            }
-
-                            ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
-                            variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);
-
-                            HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);
-
-                            for ( StateKey stateKey : stateKeysHash ) {
+                            for ( StateKey stateKey : getApplicableStates(tracker, ref, eval, evalRod.getName(), comp, compRod.getName(), sampleName) ) {
                                NewEvaluationContext nec = evaluationContexts.get(stateKey);

                                // eval against the comp
@ -410,6 +399,73 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        return null;
    }

+//    private Iterable<StateKey> getApplicableStates(final RefMetaDataTracker tracker,
+//                                                   final ReferenceContext ref,
+//                                                   final VariantContext eval,
+//                                                   final String evalName,
+//                                                   final VariantContext comp,
+//                                                   final String compName,
+//                                                   final String sampleName ) {
+//        Set<StateKey> oldKeys = new HashSet<StateKey>(Utils.makeCollection(getApplicableStatesOld(tracker, ref, eval, evalName, comp, compName, sampleName)));
+//
+//        int n = 0;
+//        for ( final StateKey newKey : getApplicableStatesNew(tracker, ref, eval, evalName, comp, compName, sampleName) ) {
+//            n++;
+//            if ( ! oldKeys.contains(newKey) )
+//                throw new ReviewedStingException("New key " + newKey + " missing from previous algorithm");
+//        }
+//
+//        if ( n != oldKeys.size() )
+//            throw new ReviewedStingException("New keyset has " + n + " elements but previous algorithm had " + oldKeys.size());
+//
+//        return oldKeys;
+//    }
+
+//    private Iterable<StateKey> getApplicableStatesNew(final RefMetaDataTracker tracker,
+//                                                   final ReferenceContext ref,
+//                                                   final VariantContext eval,
+//                                                   final String evalName,
+//                                                   final VariantContext comp,
+//                                                   final String compName,
+//                                                   final String sampleName ) {
+//        // todo -- implement optimized version
+//    }
+
+    /**
+     * Given specific eval and comp VCs and the sample name, return an iterable
+     * over all of the applicable state keys.
+     *
+     * See header of StateKey for performance problems...
+     *
+     * @param tracker
+     * @param ref
+     * @param eval
+     * @param evalName
+     * @param comp
+     * @param compName
+     * @param sampleName
+     * @return
+     */
+    private Iterable<StateKey> getApplicableStates(final RefMetaDataTracker tracker,
+                                                   final ReferenceContext ref,
+                                                   final VariantContext eval,
+                                                   final String evalName,
+                                                   final VariantContext comp,
+                                                   final String compName,
+                                                   final String sampleName ) {
+        final HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>(stratificationObjects.size());
+        for ( final VariantStratifier vs : stratificationObjects ) {
+            List<String> states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName);
+            stateMap.put(vs, states);
+        }
+
+        ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
+        variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);
+
+        return new HashSet<StateKey>(stateKeys);
+    }
+
+
    @Requires({"comp != null", "evals != null"})
    private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
        // find all of the matching comps
@ -482,11 +538,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    public void onTraversalDone(Integer result) {
        logger.info("Finalizing variant report");

-        for ( StateKey stateKey : evaluationContexts.keySet() ) {
-            NewEvaluationContext nec = evaluationContexts.get(stateKey);
+        for ( Map.Entry<StateKey, NewEvaluationContext> ecElt : evaluationContexts.entrySet() ) {
+            final StateKey stateKey = ecElt.getKey();
+            final NewEvaluationContext nec = ecElt.getValue();

            for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
                ve.finalizeEvaluation();
+                final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName();

                AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
                Map<Field, DataPoint> datamap = scanner.getData();
@ -498,7 +556,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
                        if (field.get(ve) instanceof TableType) {
                            TableType t = (TableType) field.get(ve);

-                            final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
+                            final String subTableName = veName + "." + field.getName();
                            final DataPoint dataPointAnn = datamap.get(field);

                            GATKReportTable table;
@ -539,11 +597,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
                                }
                            }
                        } else {
-                            GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
+                            GATKReportTable table = report.getTable(veName);

                            for ( VariantStratifier vs : stratificationObjects ) {
-                                String columnName = vs.getName();
-
+                                final String columnName = vs.getName();
                                table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
                            }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 public abstract class VariantEvaluator {
    private VariantEvalWalker walker;
+    private final String simpleName;
+
+    protected VariantEvaluator() {
+        this.simpleName = getClass().getSimpleName();
+    }

    public void initialize(VariantEvalWalker walker) {
        this.walker = walker;
@ -90,4 +95,8 @@ public abstract class VariantEvaluator {
    protected static String formattedRatio(final int num, final int denom) {
        return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
    }
+
+    public String getSimpleName() {
+        return simpleName;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java
@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.lang.annotation.Annotation;
 import java.lang.reflect.Field;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;

@ -40,6 +41,7 @@ import java.util.Map;
 *         the object, a Mashalling object can serialize or deserialize a analysis module.
 */
 public class AnalysisModuleScanner {
+    final private static Map<String, Annotation[]> annotationCache = new HashMap<String, Annotation[]>();

    // what we extracted from the class
    private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>();   // the data we've discovered
@ -84,12 +86,22 @@ public class AnalysisModuleScanner {
        // get the fields from the class, and extract
        for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
            for (Field f : superCls.getDeclaredFields())
-                for (Annotation annotation : f.getAnnotations()) {
+                for (Annotation annotation : getAnnotations(f)) {
                    if (annotation.annotationType().equals(DataPoint.class))
                        datums.put(f,(DataPoint) annotation);
                }
        }
    }
+    
+    private Annotation[] getAnnotations(final Field field) {
+        final String fieldName = field.toString();
+        Annotation[] annotations = annotationCache.get(fieldName);
+        if ( annotations == null ) {
+            annotations = field.getAnnotations();
+            annotationCache.put(fieldName, annotations);
+        }
+        return annotations;
+    }

    /**
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java
@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
        return new TreeMap<String, VariantEvaluator>(evaluationInstances);
    }

+    public StateKey makeStateKey() {
+        Map<String, String> map = new HashMap<String, String>(size());
+
+        for (Map.Entry<VariantStratifier, String> elt : this.entrySet() ) {
+            map.put(elt.getKey().getName(), elt.getValue());
+        }
+
+        return new StateKey(map);
+    }
+
    public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
        for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
            // the other updateN methods don't see a null context
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java
@ -3,25 +3,102 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
 import java.util.Map;
 import java.util.TreeMap;

-public class StateKey extends TreeMap<String, String> {
-//    public int hashCode() {
-//        int hashCode = 1;
-//
-//        for (final Map.Entry<String,String> pair : this.entrySet()) {
-//            hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode();
-//        }
-//
-//        return hashCode;
-//    }
+/**
+ * A final constant class representing the specific state configuration
+ * for a VariantEvaluator instance.
+ *
+ * The way this is currently implemented is by a map from the name of a VariantStratification to a
+ * specific state string.  For example, the stratification Novelty has states all, known, novel. A
+ * specific variant and comp would be tagged as "known" by the stratification, and this could be
+ * represented here by the map (Novelty -> known).
+ *
+ * TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
+ * TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
+ * TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
+ * TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
+ * TODO -- PERFORMANCE PROBLEM -- MAD 03/27/12
+ *
+ * I've been staring at this state key code for a while.  It's just not right, and expensive to boot.
+ * Here are my thoughts for future work.  The state key is both a key with specific state values for
+ * every stratification.  For example, (known, sample1, ac=1).  This capability is used in some places,
+ * such as below, to return a set of all states that should be updated given the eval and comp
+ * VCs.  In principle there are a finite set of such combinations (the product of all states for all active
+ * stratifications at initialization).  We could represent such keys as integers into the set of all combinations.
+ *
+ * Note that all of the code that manipulates these things is just terrible. It's all string manipulation and
+ * HashMaps.  Since we are effectively always squaring off our VE analyses (i.e., we have a table with
+ * all variable values for all stratification combinations) it doesn't make sense to allow so much dynamicism.  Instead
+ * we should just upfront create a giant table indexed by integer keys, and manage data via a simple map from
+ * specific strat state to this key.
+ *
+ * The reason this is so important is that >80% of the runtime of VE with VCFs with >1000 samples is spent in
+ * the initializeStateKey function.  Instead, we should have code that looks like:
+ *
+ * init:
+ * allStates <- initializeCombinationalStateSpace
+ *
+ * map:
+ * for each eval / comp pair:
+ *   for each relevantState based on eval / comp:
+ *     allStates[relevantState].update(eval, comp)
+ *
+ *
+ */
+public final class StateKey {
+    /** High-performance cache of the toString operation for a constant class */
+    private final String string;
+    private final TreeMap<String, String> states;

-    public String toString() {
-        String value = "";
+    public StateKey(final Map<String, String> states) {
+        this.states = new TreeMap<String, String>(states);
+        this.string = formatString();
+    }

-        for ( final String key : this.keySet() ) {
-            //value += "\tstate " + key + ":" + this.get(key) + "\n";
-            value += String.format("%s:%s;", key, this.get(key));
+    public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) {
+        if ( toOverride == null ) {
+            this.states = new TreeMap<String, String>();
+        } else {
+            this.states = new TreeMap<String, String>(toOverride.states);
        }

-        return value;
+        this.states.put(keyOverride, valueOverride);
+        this.string = formatString();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final StateKey stateKey = (StateKey) o;
+
+        if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return states.hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return string;
+    }
+
+    private final String formatString() {
+        StringBuilder b = new StringBuilder();
+        
+        for ( Map.Entry<String, String> entry : states.entrySet() ) {
+            b.append(String.format("%s:%s;", entry.getKey(), entry.getValue()));
+        }
+
+        return b.toString();
+    }
+
+    // TODO -- might be slow because of tree map
+    public String get(final String key) {
+        return states.get(key);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@ -214,20 +214,9 @@ public class VariantEvalUtils {
                ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
            }
        } else {
-            HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
-
-            StateKey stateKey = new StateKey();
-            for (VariantStratifier vs : ec.keySet()) {
-                String state = ec.get(vs);
-
-                stateKey.put(vs.getName(), state);
-            }
-
+            final StateKey stateKey = ec.makeStateKey();
            ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects);
-
-            necs.put(stateKey, ec);
-
-            return necs;
+            return new HashMap<StateKey, NewEvaluationContext>(Collections.singletonMap(stateKey, ec));
        }

        return ecs;
@ -428,14 +417,8 @@ public class VariantEvalUtils {
            HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
            VariantStratifier vs = oneSetOfStates.keySet().iterator().next();

-            for (String state : oneSetOfStates.get(vs)) {
-                StateKey newStateKey = new StateKey();
-                if (stateKey != null) {
-                    newStateKey.putAll(stateKey);
-                }
-
-                newStateKey.put(vs.getName(), state);
-
+            for (final String state : oneSetOfStates.get(vs)) {
+                final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state);
                initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
            }
        } else {
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@ -182,7 +182,6 @@ public class Haplotype {
    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
                                                               final int haplotypeSize, final int numPrefBases) {

-
        LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();

        Allele refAllele = null;
@ -215,13 +214,13 @@ public class Haplotype {


        // Create location for all haplotypes
-        int startLoc = ref.getWindow().getStart() + startIdxInReference;
-        int stopLoc = startLoc + haplotypeSize-1;
+        final int startLoc = ref.getWindow().getStart() + startIdxInReference;
+        final int stopLoc = startLoc + haplotypeSize-1;

-        GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
+        final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);


-        for (Allele a : alleleList) {
+        for (final Allele a : alleleList) {

            byte[] alleleBases = a.getBases();
            // use string concatenation
@ -315,5 +314,4 @@ public class Haplotype {

        return (fallsInsideDeletion ? -1 : readBases);
    }
-
 }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -234,6 +234,9 @@ public class MathUtils {
        double sum = 0.0;

        double maxValue = Utils.findMaxEntry(log10p);
+        if(maxValue == Double.NEGATIVE_INFINITY)
+            return sum;
+
        for (int i = start; i < finish; i++) {
            sum += Math.pow(10.0, log10p[i] - maxValue);
        }
@ -1508,6 +1511,24 @@ public class MathUtils {
        return result;
    }

+    /** Same routine, unboxed types for efficiency
+     *
+     * @param x
+     * @param y
+     * @return Vector of same length as x and y so that z[k] = x[k]+y[k]
+     */
+    public static double[] vectorSum(double[]x, double[] y) {
+        if (x.length != y.length)
+            throw new ReviewedStingException("BUG: Lengths of x and y must be the same");
+
+        double[] result = new double[x.length];
+        for (int k=0; k <x.length; k++)
+            result[k] = x[k]+y[k];
+
+        return result;
+    }
+
+
    public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {

        Double result[] = new Double[v1.length];
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@ -703,4 +703,11 @@ public class Utils {
        return programRecord;
    }

+    public static <E> Collection<E> makeCollection(Iterable<E> iter) {
+        Collection<E> list = new ArrayList<E>();
+        for (E item : iter) {
+            list.add(item);
+        }
+        return list;
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java
@ -1,7 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
@ -18,7 +17,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
    static double[] AA1, AB1, BB1;
    static double[] AA2, AB2, AC2, BB2, BC2, CC2;
    static final int numSamples = 3;
-    static double[][] priors = new double[2][2*numSamples+1];  // flat priors
+    static double[] priors = new double[2*numSamples+1];  // flat priors

    @BeforeSuite
    public void before() {
@ -83,26 +82,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
    @Test(dataProvider = "getGLs")
    public void testGLs(GetGLsTest cfg) {

-        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2, 2*numSamples);
-        for ( int i = 0; i < 2; i++ ) {
-            for ( int j = 0; j < 2*numSamples+1; j++ ) {
-                result.log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
-                result.log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
-            }
-        }
+        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);

-        ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result, false);
+        ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);

        int nameIndex = 1;
        for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
            int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
-            int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]);
+            int calculatedAlleleCount = result.getAlleleCountsOfMAP()[allele];

-            if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) {
-                Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero);
-            } else {
-                Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
-            }
+            Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
        }
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -17,8 +17,8 @@ import java.util.Map;

 public class UnifiedGenotyperIntegrationTest extends WalkerTest {

-    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
-    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
+    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
+    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
    private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;

    // --------------------------------------------------------------------------------------------------------------
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232"));
+                Arrays.asList("ac3737b4212f634a03c640c83f670955"));
        executeTest("test MultiSample Pilot1", spec);
    }

@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultipleSNPAlleles() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
-                Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
+                Arrays.asList("6f70dfbaf3bb70c702f9e9dbacd67c17"));
        executeTest("test Multiple SNP alleles", spec);
    }

@ -138,7 +138,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testSLOD() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("6172d2f3d370132f4c57a26aa94c256e"));
+                Arrays.asList("e9d23a08472e4e27b4f25e844f5bad57"));
        executeTest("test SLOD", spec);
    }

@ -146,8 +146,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testOutputParameter() {
        HashMap<String, String> e = new HashMap<String, String>();
        e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
-        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "553f6b4cbf380885bec9dd634cf68742" );
-        e.put( "--output_mode EMIT_ALL_SITES", "6d8624e45ad9dae5803ac705b39e4ffa" );
+        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "ecf92054c1e4bd9d6529b8002d385165" );
+        e.put( "--output_mode EMIT_ALL_SITES", "119c9fcefbc69e0fc10b1dc52f6438e3" );

        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -300,13 +300,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSampleIndels() {
        WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("52340d578a708fa709b69ce48987bc9d"));
+                Arrays.asList("fbc48d7d9e622c9af7922f91bc858151"));
        List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();

        WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
                        "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("9566c7abef5ee5829a516d90445b347f"));
+                Arrays.asList("94c52ef70e44709ccd947d32e9c27da9"));
        executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
    }

@ -335,4 +335,37 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                UserException.class);
        executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // testing SnpEff
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
+            + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
+    
+    @Test
+    public void testMinIndelFraction0() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 0.0", 1,
+                Arrays.asList("f08ff07ad49d388198c1887baad05977"));
+        executeTest("test minIndelFraction 0.0", spec);
+    }
+
+    @Test
+    public void testMinIndelFraction25() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 0.25", 1,
+                Arrays.asList("a0945fd21369aaf68c7f1d96dbb930d1"));
+        executeTest("test minIndelFraction 0.25", spec);
+    }
+
+    @Test
+    public void testMinIndelFraction100() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 1", 1,
+                Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
+        executeTest("test minIndelFraction 1.0", spec);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationSiteSelectorIntegrationTest.java
@ -0,0 +1,90 @@
+package org.broadinstitute.sting.gatk.walkers.validation;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: delangel
+ * Date: 3/26/12
+ * Time: 3:29 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
+    public static String baseTestString(String args) {
+        return "-T ValidationSiteSelector -R " + b36KGReference + " -L 1 -o %s -NO_HEADER -numSites 100 " + args;
+    }
+
+    private static String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
+    private static String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
+    private static String samplePrefix = " -sf " + samplesFile;
+    private static String freqUnif = " --frequencySelectionMode UNIFORM ";
+    private static String freqAF = " --frequencySelectionMode KEEP_AF_SPECTRUM ";
+    private static String sampleNone = " -sampleMode NONE ";
+    private static String sampleGT = samplePrefix+" -sampleMode POLY_BASED_ON_GT ";
+    private static String sampleGL = samplePrefix+" -sampleMode POLY_BASED_ON_GL -samplePNonref 0.95";
+
+
+    @Test(enabled=true)
+    public void testNoSampleSelectionFreqUniform() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString(sampleNone + freqUnif + "--variant " + testfile),
+                1,
+                Arrays.asList("d49baeb8000a426c172ce1d81eb37963")
+        );
+
+        executeTest("testNoSampleSelectionFreqUniform--" + testfile, spec);
+    }
+
+    @Test(enabled=true)
+    public void testNoSampleSelectionFreqAF() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString(sampleNone + freqAF + "--variant " + testfile),
+                1,
+                Arrays.asList("0fb0d015d462c34514fc7e96beea5f56")
+        );
+
+        executeTest("testNoSampleSelectionFreqAF--" + testfile, spec);
+    }
+
+    @Test(enabled=true)
+    public void testPolyGTFreqUniform() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString(sampleGT + freqUnif + "--variant " + testfile),
+                1,
+                Arrays.asList("0672854299d42ea8af906976a3849ae6")
+        );
+
+        executeTest("testPolyGTFreqUniform--" + testfile, spec);
+    }
+
+    @Test(enabled=true)
+    public void testPolyGTFreqAF() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString(sampleGT + freqAF + "--variant " + testfile),
+                1,
+                Arrays.asList("5bdffda1a063d0bddd6b236854ec627d")
+        );
+
+        executeTest("testPolyGTFreqAF--" + testfile, spec);
+    }
+
+    @Test(enabled=true)
+    public void testPolyGLFreqAF() {
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString(sampleGL + freqAF + "--variant " + testfile),
+                1,
+                Arrays.asList("35ef16aa41303606a4b94f7b88bd9aa8")
+        );
+
+        executeTest("testPolyGLFreqAF--" + testfile, spec);
+    }
+
+}