Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-03-26 16:42:03 -04:00 · 2012-03-26 16:42:03 -04:00 · e8bb8ade1a
parent 1a2a4848e8 34ea443cdb
commit e8bb8ade1a
11 changed files with 444 additions and 246 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java
@ -26,14 +26,12 @@ package org.broadinstitute.sting.gatk.report;

 import org.apache.commons.lang.math.NumberUtils;

-import java.util.Arrays;
-import java.util.Collection;
-import java.util.TreeMap;
+import java.util.*;

 /**
 * Holds values for a column in a GATK report table
 */
-public class GATKReportColumn extends TreeMap<Object, Object> {
+public class GATKReportColumn extends LinkedHashMap<Object, Object> {
    final private String columnName;
    final private Object defaultValue;
    final private String format;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java
@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2012, The Broad Institute 
+ *
+ * Permission is hereby granted, free of charge, to any person 
+ * obtaining a copy of this software and associated documentation 
+ * files (the "Software"), to deal in the Software without 
+ * restriction, including without limitation the rights to use, 
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell 
+ * copies of the Software, and to permit persons to whom the 
+ * Software is furnished to do so, subject to the following 
+ * conditions: 
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software. 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.*;
+
+/**
+ * Code for determining which indels are segregating among the samples.
+ *
+ * This code is just a refactor of the original code from Guillermo in the UG.
+ *
+ * @author Mark DePristo
+ * @since 3/26/12
+ */
+public class ConsensusAlleleCounter {
+    final protected static Logger logger = Logger.getLogger(ConsensusAlleleCounter.class);
+    private final int minIndelCountForGenotyping;
+    private final boolean doMultiAllelicCalls;
+    private final double minFractionInOneSample;
+    private final GenomeLocParser locParser;
+
+    public ConsensusAlleleCounter(final GenomeLocParser locParser,
+                                  final boolean doMultiAllelicCalls,
+                                  final int minIndelCountForGenotyping,
+                                  final double minFractionInOneSample) {
+        this.minIndelCountForGenotyping = minIndelCountForGenotyping;
+        this.doMultiAllelicCalls = doMultiAllelicCalls;
+        this.minFractionInOneSample = minFractionInOneSample;
+        this.locParser = locParser;
+    }
+
+    /**
+     * Returns a list of Alleles at this locus that may be segregating
+     * 
+     * @param ref
+     * @param contexts
+     * @param contextType
+     * @return
+     */
+    public List<Allele> computeConsensusAlleles(ReferenceContext ref,
+                                                Map<String, AlignmentContext> contexts,
+                                                AlignmentContextUtils.ReadOrientation contextType) {
+        final Map<String, Integer> consensusIndelStrings = countConsensusAlleles(ref, contexts, contextType);
+//        logger.info("Alleles at " + ref.getLocus());
+//        for ( Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet() ) {
+//            logger.info("    " + elt.getValue() + " => " + elt.getKey());
+//        }
+        return consensusCountsToAlleles(ref, consensusIndelStrings);
+    }
+
+    //
+    // TODO -- WARNING DOESN'T WORK WITH REDUCED READS
+    //
+    private Map<String, Integer> countConsensusAlleles(ReferenceContext ref,
+                                                       Map<String, AlignmentContext> contexts,
+                                                       AlignmentContextUtils.ReadOrientation contextType) {
+        final GenomeLoc loc = ref.getLocus();
+        HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
+
+        int insCount = 0, delCount = 0;
+        // quick check of total number of indels in pileup
+        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
+            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
+
+            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
+            insCount += indelPileup.getNumberOfInsertions();
+            delCount += indelPileup.getNumberOfDeletions();
+        }
+
+        if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
+            return Collections.emptyMap();
+
+        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
+            // todo -- warning, can be duplicating expensive partition here
+            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
+
+            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
+
+            final int nIndelReads = indelPileup.getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
+            final int nReadsOverall = indelPileup.getNumberOfElements();
+            if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
+//                if ( nIndelReads > 0 )
+//                    logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
+                continue;
+//            } else {
+//                logger.info("### Keeping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
+            }
+
+            for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
+                final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+                if (read == null)
+                    continue;
+                if (ReadUtils.is454Read(read)) {
+                    continue;
+                }
+
+/*                if (DEBUG && p.isIndel()) {
+                    System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
+                            read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
+                            p.getEventLength(),p.getType().toString(), p.getEventBases());
+                }
+   */
+                String indelString = p.getEventBases();
+                if (p.isInsertion()) {
+                    boolean foundKey = false;
+                    // copy of hashmap into temp arrayList
+                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
+                    for (String s : consensusIndelStrings.keySet()) {
+                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    }
+
+                    if (read.getAlignmentEnd() == loc.getStart()) {
+                        // first corner condition: a read has an insertion at the end, and we're right at the insertion.
+                        // In this case, the read could have any of the inserted bases and we need to build a consensus
+
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            // case 1: current insertion is prefix of indel in hash map
+                            if (s.startsWith(indelString)) {
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
+                                foundKey = true;
+                            }
+                            else if (indelString.startsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
+                                // In this case, new bases are new key.
+                                foundKey = true;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
+                            }
+                        }
+                        if (!foundKey)
+                            // none of the above: event bases not supported by previous table, so add new key
+                            cList.add(new Pair<String, Integer>(indelString,1));
+
+                    }
+                    else if (read.getAlignmentStart() == loc.getStart()+1) {
+                        // opposite corner condition: read will start at current locus with an insertion
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            if (s.endsWith(indelString)) {
+                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
+                                foundKey = true;
+                            }
+                            else if (indelString.endsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
+                                // In this case, new bases are new key.
+                                foundKey = true;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
+                            }
+                        }
+                        if (!foundKey)
+                            // none of the above: event bases not supported by previous table, so add new key
+                            cList.add(new Pair<String, Integer>(indelString,1));
+
+
+                    }
+                    else {
+                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
+                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
+                    }
+
+                    // copy back arrayList into hashMap
+                    consensusIndelStrings.clear();
+                    for (Pair<String,Integer> pair : cList) {
+                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
+                    }
+
+                }
+                else if (p.isDeletion()) {
+                    indelString = String.format("D%d",p.getEventLength());
+                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                    consensusIndelStrings.put(indelString,cnt+1);
+
+                }
+            }
+        }
+
+        return consensusIndelStrings;
+    }
+
+    private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
+                                                  final Map<String, Integer> consensusIndelStrings) {
+        final GenomeLoc loc = ref.getLocus();
+        final Collection<VariantContext> vcs = new ArrayList<VariantContext>();
+        int maxAlleleCnt = 0;
+        Allele refAllele, altAllele;
+
+        for (final Map.Entry<String, Integer> elt : consensusIndelStrings.entrySet()) {
+            final String s = elt.getKey();
+            final int curCnt = elt.getValue();
+            int stop = 0;
+
+            // if observed count if above minimum threshold, we will genotype this allele
+            if (curCnt < minIndelCountForGenotyping)
+                continue;
+
+            if (s.startsWith("D")) {
+                // get deletion length
+                final int dLen = Integer.valueOf(s.substring(1));
+                // get ref bases of accurate deletion
+                final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
+                stop = loc.getStart() + dLen;
+                final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
+
+                if (Allele.acceptableAlleleBases(refBases)) {
+                    refAllele = Allele.create(refBases, true);
+                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+                }
+                else continue; // don't go on with this allele if refBases are non-standard
+            } else {
+                // insertion case
+                if (Allele.acceptableAlleleBases(s)) {
+                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
+                    altAllele = Allele.create(s, false);
+                    stop = loc.getStart();
+                }
+                else continue; // go on to next allele if consensus insertion has any non-standard base.
+            }
+
+
+            final VariantContextBuilder builder = new VariantContextBuilder().source("");
+            builder.loc(loc.getContig(), loc.getStart(), stop);
+            builder.alleles(Arrays.asList(refAllele, altAllele));
+            builder.referenceBaseForIndel(ref.getBase());
+            builder.noGenotypes();
+            if (doMultiAllelicCalls)
+                vcs.add(builder.make());
+            else {
+                if (curCnt > maxAlleleCnt) {
+                    maxAlleleCnt = curCnt;
+                    vcs.clear();
+                    vcs.add(builder.make());
+                }
+
+            }
+        }
+
+        if (vcs.isEmpty())
+            return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
+
+        final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
+        return mergedVC.getAlleles();
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -52,11 +52,9 @@ import java.util.*;
 public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
    private final int HAPLOTYPE_SIZE;

-    private final int minIndelCountForGenotyping;
    private final boolean getAlleleListFromVCF;

    private boolean DEBUG = false;
-    private final boolean doMultiAllelicCalls = true;
    private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
    private PairHMMIndelErrorModel pairModel;

@ -72,7 +70,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    // gdebug removeme
    // todo -cleanup
    private GenomeLoc lastSiteVisited;
-    private ArrayList<Allele> alleleList;
+    private List<Allele> alleleList = new ArrayList<Allele>();

    static {
        indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
@ -83,205 +81,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        super(UAC, logger);
        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
                UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
-        alleleList = new ArrayList<Allele>();
        getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
-        minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
        HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
        DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
-
        haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
        ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
    }

-
-    private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
-                                                      Map<String, AlignmentContext> contexts,
-                                                      AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
-        Allele refAllele = null, altAllele = null;
-        GenomeLoc loc = ref.getLocus();
-        ArrayList<Allele> aList = new ArrayList<Allele>();
-
-        HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>();
-
-        int insCount = 0, delCount = 0;
-        // quick check of total number of indels in pileup
-        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
-            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
-
-            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
-            insCount += indelPileup.getNumberOfInsertions();
-            delCount += indelPileup.getNumberOfDeletions();
-        }
-
-        if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
-            return aList;
-
-        for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
-            // todo -- warning, can be duplicating expensive partition here
-            AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
-
-            final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
-
-
-            for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) {
-                //SAMRecord read = p.getRead();
-                GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
-                if (read == null)
-                    continue;
-                if (ReadUtils.is454Read(read)) {
-                    continue;
-                }
-
-/*                if (DEBUG && p.isIndel()) {
-                    System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n",
-                            read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(),
-                            p.getEventLength(),p.getType().toString(), p.getEventBases());
-                }
-   */
-
-                String indelString = p.getEventBases();
-                if (p.isInsertion()) {
-                    boolean foundKey = false;
-                    // copy of hashmap into temp arrayList
-                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
-                    for (String s : consensusIndelStrings.keySet()) {
-                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
-                    }
-
-                    if (read.getAlignmentEnd() == loc.getStart()) {
-                        // first corner condition: a read has an insertion at the end, and we're right at the insertion.
-                        // In this case, the read could have any of the inserted bases and we need to build a consensus
-
-                        for (int k=0; k < cList.size(); k++) {
-                            String s = cList.get(k).getFirst();
-                            int cnt = cList.get(k).getSecond();
-                            // case 1: current insertion is prefix of indel in hash map
-                            if (s.startsWith(indelString)) {
-                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
-                                foundKey = true;
-                            }
-                            else if (indelString.startsWith(s)) {
-                                // case 2: indel stored in hash table is prefix of current insertion
-                                // In this case, new bases are new key.
-                                foundKey = true;
-                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
-                            }
-                        }
-                        if (!foundKey)
-                            // none of the above: event bases not supported by previous table, so add new key
-                            cList.add(new Pair<String, Integer>(indelString,1));
-
-                    }
-                    else if (read.getAlignmentStart() == loc.getStart()+1) {
-                        // opposite corner condition: read will start at current locus with an insertion
-                        for (int k=0; k < cList.size(); k++) {
-                            String s = cList.get(k).getFirst();
-                            int cnt = cList.get(k).getSecond();
-                            if (s.endsWith(indelString)) {
-                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
-                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
-                                foundKey = true;
-                            }
-                            else if (indelString.endsWith(s)) {
-                                // case 2: indel stored in hash table is prefix of current insertion
-                                // In this case, new bases are new key.
-                                foundKey = true;
-                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
-                            }
-                        }
-                        if (!foundKey)
-                            // none of the above: event bases not supported by previous table, so add new key
-                            cList.add(new Pair<String, Integer>(indelString,1));
-
-
-                    }
-                    else {
-                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
-                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
-                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
-                    }
-
-                    // copy back arrayList into hashMap
-                    consensusIndelStrings.clear();
-                    for (Pair<String,Integer> pair : cList) {
-                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
-                    }
-
-                }
-                else if (p.isDeletion()) {
-                    indelString = String.format("D%d",p.getEventLength());
-                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
-                    consensusIndelStrings.put(indelString,cnt+1);
-
-                }
-            }
-
-        }
-
-        Collection<VariantContext> vcs = new ArrayList<VariantContext>();
-        int maxAlleleCnt = 0;
-        String bestAltAllele = "";
-
-        for (String s : consensusIndelStrings.keySet()) {
-            int curCnt = consensusIndelStrings.get(s), stop = 0;
-            // if observed count if above minimum threshold, we will genotype this allele
-            if (curCnt < minIndelCountForGenotyping)
-                continue;
-
-            if (s.startsWith("D")) {
-                // get deletion length
-                int dLen = Integer.valueOf(s.substring(1));
-                // get ref bases of accurate deletion
-                int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart();
-                stop = loc.getStart() + dLen;
-                byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
-
-                if (Allele.acceptableAlleleBases(refBases)) {
-                    refAllele = Allele.create(refBases, true);
-                    altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
-                }
-                else continue; // don't go on with this allele if refBases are non-standard
-            } else {
-                // insertion case
-                if (Allele.acceptableAlleleBases(s)) {
-                    refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
-                    altAllele = Allele.create(s, false);
-                    stop = loc.getStart();
-                }
-                else continue; // go on to next allele if consensus insertion has any non-standard base.
-            }
-
-
-            ArrayList vcAlleles = new ArrayList<Allele>();
-            vcAlleles.add(refAllele);
-            vcAlleles.add(altAllele);
-
-            final VariantContextBuilder builder = new VariantContextBuilder().source("");
-            builder.loc(loc.getContig(), loc.getStart(), stop);
-            builder.alleles(vcAlleles);
-            builder.referenceBaseForIndel(ref.getBase());
-            builder.noGenotypes();
-            if (doMultiAllelicCalls)
-                vcs.add(builder.make());
-            else {
-                if (curCnt > maxAlleleCnt) {
-                    maxAlleleCnt = curCnt;
-                    vcs.clear();
-                    vcs.add(builder.make());
-                }
-
-            }
-        }
-
-        if (vcs.isEmpty())
-            return aList; // nothing else to do, no alleles passed minimum count criterion
-
-        VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
-
-        aList = new ArrayList<Allele>(mergedVC.getAlleles());
-
-        return aList;
-
+    private List<Allele> computeConsensusAlleles(ReferenceContext ref,
+                                                 Map<String, AlignmentContext> contexts,
+                                                 AlignmentContextUtils.ReadOrientation contextType,
+                                                 GenomeLocParser locParser) {
+        ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
+        return counter.computeConsensusAlleles(ref, contexts, contextType);
    }

    private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -118,6 +118,17 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
    public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;

+    /**
+     * Complementary argument to minIndelCnt.  Only samples with at least this fraction of indel-containing reads will contribute
+     * to counting and overcoming the threshold minIndelCnt.  This parameter ensures that in deep data you don't end
+     * up summing lots of super rare errors up to overcome the 5 read default threshold.  Should work equally well for
+     * low-coverage and high-coverage samples, as low coverage samples with any indel containing reads should easily over
+     * come this threshold.
+     */
+    @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false)
+    public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25;
+
+
    /**
     * This argument informs the prior probability of having an indel at a site.
     */
@ -165,6 +176,7 @@ public class UnifiedArgumentCollection {
        uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
        uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION;
        uac.MIN_INDEL_COUNT_FOR_GENOTYPING = MIN_INDEL_COUNT_FOR_GENOTYPING;
+        uac.MIN_INDEL_FRACTION_PER_SAMPLE = MIN_INDEL_FRACTION_PER_SAMPLE;
        uac.INDEL_HETEROZYGOSITY = INDEL_HETEROZYGOSITY;
        uac.INDEL_GAP_OPEN_PENALTY = INDEL_GAP_OPEN_PENALTY;
        uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -482,11 +482,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    public void onTraversalDone(Integer result) {
        logger.info("Finalizing variant report");

-        for ( StateKey stateKey : evaluationContexts.keySet() ) {
-            NewEvaluationContext nec = evaluationContexts.get(stateKey);
+        for ( Map.Entry<StateKey, NewEvaluationContext> ecElt : evaluationContexts.entrySet() ) {
+            final StateKey stateKey = ecElt.getKey();
+            final NewEvaluationContext nec = ecElt.getValue();

            for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
                ve.finalizeEvaluation();
+                final String veName = ve.getSimpleName(); // ve.getClass().getSimpleName();

                AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
                Map<Field, DataPoint> datamap = scanner.getData();
@ -498,7 +500,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
                        if (field.get(ve) instanceof TableType) {
                            TableType t = (TableType) field.get(ve);

-                            final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
+                            final String subTableName = veName + "." + field.getName();
                            final DataPoint dataPointAnn = datamap.get(field);

                            GATKReportTable table;
@ -539,11 +541,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
                                }
                            }
                        } else {
-                            GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
+                            GATKReportTable table = report.getTable(veName);

                            for ( VariantStratifier vs : stratificationObjects ) {
-                                String columnName = vs.getName();
-
+                                final String columnName = vs.getName();
                                table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
                            }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java
@ -8,6 +8,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 public abstract class VariantEvaluator {
    private VariantEvalWalker walker;
+    private final String simpleName;
+
+    protected VariantEvaluator() {
+        this.simpleName = getClass().getSimpleName();
+    }

    public void initialize(VariantEvalWalker walker) {
        this.walker = walker;
@ -90,4 +95,8 @@ public abstract class VariantEvaluator {
    protected static String formattedRatio(final int num, final int denom) {
        return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
    }
+
+    public String getSimpleName() {
+        return simpleName;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java
@ -27,6 +27,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.lang.annotation.Annotation;
 import java.lang.reflect.Field;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;

@ -40,6 +41,7 @@ import java.util.Map;
 *         the object, a Mashalling object can serialize or deserialize a analysis module.
 */
 public class AnalysisModuleScanner {
+    final private static Map<String, Annotation[]> annotationCache = new HashMap<String, Annotation[]>();

    // what we extracted from the class
    private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>();   // the data we've discovered
@ -84,12 +86,22 @@ public class AnalysisModuleScanner {
        // get the fields from the class, and extract
        for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
            for (Field f : superCls.getDeclaredFields())
-                for (Annotation annotation : f.getAnnotations()) {
+                for (Annotation annotation : getAnnotations(f)) {
                    if (annotation.annotationType().equals(DataPoint.class))
                        datums.put(f,(DataPoint) annotation);
                }
        }
    }
+    
+    private Annotation[] getAnnotations(final Field field) {
+        final String fieldName = field.toString();
+        Annotation[] annotations = annotationCache.get(fieldName);
+        if ( annotations == null ) {
+            annotations = field.getAnnotations();
+            annotationCache.put(fieldName, annotations);
+        }
+        return annotations;
+    }

    /**
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/NewEvaluationContext.java
@ -36,6 +36,16 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
        return new TreeMap<String, VariantEvaluator>(evaluationInstances);
    }

+    public StateKey makeStateKey() {
+        Map<String, String> map = new HashMap<String, String>(size());
+
+        for (Map.Entry<VariantStratifier, String> elt : this.entrySet() ) {
+            map.put(elt.getKey().getName(), elt.getValue());
+        }
+
+        return new StateKey(map);
+    }
+
    public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
        for ( final VariantEvaluator evaluation : evaluationInstances.values() ) {
            // the other updateN methods don't see a null context
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/StateKey.java
@ -3,25 +3,67 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
 import java.util.Map;
 import java.util.TreeMap;

-public class StateKey extends TreeMap<String, String> {
-//    public int hashCode() {
-//        int hashCode = 1;
-//
-//        for (final Map.Entry<String,String> pair : this.entrySet()) {
-//            hashCode *= pair.getKey().hashCode() + pair.getValue().hashCode();
-//        }
-//
-//        return hashCode;
-//    }
+/**
+ * A final constant class representing the specific state configuration
+ * for a VariantEvaluator instance.
+ *
+ * TODO optimizations to entirely remove the TreeMap and just store the HashMap for performance and use the tree for the sorted tostring function.
+ */
+public final class StateKey {
+    /** High-performance cache of the toString operation for a constant class */
+    private final String string;
+    private final TreeMap<String, String> states;

-    public String toString() {
-        String value = "";
+    public StateKey(final Map<String, String> states) {
+        this.states = new TreeMap<String, String>(states);
+        this.string = formatString();
+    }

-        for ( final String key : this.keySet() ) {
-            //value += "\tstate " + key + ":" + this.get(key) + "\n";
-            value += String.format("%s:%s;", key, this.get(key));
+    public StateKey(final StateKey toOverride, final String keyOverride, final String valueOverride) {
+        if ( toOverride == null ) {
+            this.states = new TreeMap<String, String>();
+        } else {
+            this.states = new TreeMap<String, String>(toOverride.states);
        }

-        return value;
+        this.states.put(keyOverride, valueOverride);
+        this.string = formatString();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        final StateKey stateKey = (StateKey) o;
+
+        if (states != null ? !states.equals(stateKey.states) : stateKey.states != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return states.hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return string;
+    }
+
+    private final String formatString() {
+        StringBuilder b = new StringBuilder();
+        
+        for ( Map.Entry<String, String> entry : states.entrySet() ) {
+            b.append(String.format("%s:%s;", entry.getKey(), entry.getValue()));
+        }
+
+        return b.toString();
+    }
+
+    // TODO -- might be slow because of tree map
+    public String get(final String key) {
+        return states.get(key);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@ -214,20 +214,9 @@ public class VariantEvalUtils {
                ecs.putAll(initializeEvaluationContexts(stratificationObjects, evaluationObjects, newStratStack, nec));
            }
        } else {
-            HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
-
-            StateKey stateKey = new StateKey();
-            for (VariantStratifier vs : ec.keySet()) {
-                String state = ec.get(vs);
-
-                stateKey.put(vs.getName(), state);
-            }
-
+            final StateKey stateKey = ec.makeStateKey();
            ec.addEvaluationClassList(variantEvalWalker, stateKey, evaluationObjects);
-
-            necs.put(stateKey, ec);
-
-            return necs;
+            return new HashMap<StateKey, NewEvaluationContext>(Collections.singletonMap(stateKey, ec));
        }

        return ecs;
@ -428,14 +417,8 @@ public class VariantEvalUtils {
            HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
            VariantStratifier vs = oneSetOfStates.keySet().iterator().next();

-            for (String state : oneSetOfStates.get(vs)) {
-                StateKey newStateKey = new StateKey();
-                if (stateKey != null) {
-                    newStateKey.putAll(stateKey);
-                }
-
-                newStateKey.put(vs.getName(), state);
-
+            for (final String state : oneSetOfStates.get(vs)) {
+                final StateKey newStateKey = new StateKey(stateKey, vs.getName(), state);
                initializeStateKeys(stateMap, newStateStack, newStateKey, stateKeys);
            }
        } else {
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -17,8 +17,8 @@ import java.util.Map;

 public class UnifiedGenotyperIntegrationTest extends WalkerTest {

-    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
-    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129;
+    private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
+    private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
    private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;

    // --------------------------------------------------------------------------------------------------------------
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultipleSNPAlleles() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
-                Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
+                Arrays.asList("849ee8b21b4bbb02dfc7867a4f1bc14b"));
        executeTest("test Multiple SNP alleles", spec);
    }

@ -335,4 +335,37 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                UserException.class);
        executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
    }
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // testing SnpEff
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation
+            + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030";
+    
+    @Test
+    public void testMinIndelFraction0() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 0.0", 1,
+                Arrays.asList("f08ff07ad49d388198c1887baad05977"));
+        executeTest("test minIndelFraction 0.0", spec);
+    }
+
+    @Test
+    public void testMinIndelFraction25() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 0.25", 1,
+                Arrays.asList("a0945fd21369aaf68c7f1d96dbb930d1"));
+        executeTest("test minIndelFraction 0.25", spec);
+    }
+
+    @Test
+    public void testMinIndelFraction100() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                assessMinIndelFraction + " -minIndelFrac 1", 1,
+                Arrays.asList("50fe9a4c5633f6395b45d9ec1e00d56a"));
+        executeTest("test minIndelFraction 1.0", spec);
+    }
 }