Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-07-17 15:12:28 -04:00 · 2012-07-17 15:12:28 -04:00 · b0d99fd10d
parent 305db8c0d1 bf2d5efe4d
commit b0d99fd10d
18 changed files with 3409 additions and 5 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnEdge.java
@ -0,0 +1,60 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+
+// simple edge class for connecting nodes in the graph
+public class DeBruijnEdge implements Comparable<DeBruijnEdge> {
+
+    private int multiplicity;
+    private boolean isRef;
+
+    public DeBruijnEdge() {
+        multiplicity = 1;
+        isRef = false;
+    }
+
+    public DeBruijnEdge( final boolean isRef ) {
+        multiplicity = 1;
+        this.isRef = isRef;
+    }
+
+    public DeBruijnEdge( final boolean isRef, final int multiplicity ) {
+        this.multiplicity = multiplicity;
+        this.isRef = isRef;
+    }
+
+    public int getMultiplicity() {
+        return multiplicity;
+    }
+
+    public void setMultiplicity( final int value ) {
+        multiplicity = value;
+    }
+
+    public boolean getIsRef() {
+        return isRef;
+    }
+
+    public void setIsRef( final boolean isRef ) {
+        this.isRef = isRef;
+    }
+
+    public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+        return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge)));
+    }
+
+    public boolean equals( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph2 ) {
+        return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
+    }
+
+    @Override
+    public int compareTo( final DeBruijnEdge that ) {
+        return this.multiplicity - that.multiplicity;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java
@ -0,0 +1,46 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import java.util.Arrays;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+// simple node class for storing kmer sequences
+public class DeBruijnVertex {
+
+    protected final byte[] sequence;
+    public final int kmer;
+
+    public DeBruijnVertex( final byte[] sequence, final int kmer ) {
+        this.sequence = sequence;
+        this.kmer = kmer;
+    }
+
+    @Override
+    public boolean equals( Object v ) {
+        return v instanceof DeBruijnVertex && Arrays.equals(sequence, ((DeBruijnVertex) v).sequence);
+    }
+
+    @Override
+    public int hashCode() { // necessary to override here so that graph.containsVertex() works the same way as vertex.equals() as one might expect
+        return Arrays.hashCode(sequence);
+    }
+
+    public String toString() {
+        return new String(sequence);
+    }   
+    
+    public String getSuffixString() {
+        return new String( getSuffix() );
+    }
+
+    public byte[] getSequence() {
+        return sequence;
+    }
+
+    public byte[] getSuffix() {
+        return Arrays.copyOfRange( sequence, kmer - 1, sequence.length );
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@ -0,0 +1,610 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.util.*;
+
+public class GenotypingEngine {
+
+    private final boolean DEBUG;
+    private final int MNP_LOOK_AHEAD;
+    private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
+    private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+    private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
+
+    public GenotypingEngine( final boolean DEBUG, final int MNP_LOOK_AHEAD, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+        this.DEBUG = DEBUG;
+        this.MNP_LOOK_AHEAD = MNP_LOOK_AHEAD;
+        this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
+        noCall.add(Allele.NO_CALL);
+    }
+
+    // This function is the streamlined approach, currently not being used
+    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
+                                                                                                                             final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) {
+        // Prepare the list of haplotype indices to genotype
+        final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
+
+        for( final Haplotype h : haplotypes ) {
+            allelesToGenotype.add( Allele.create(h.getBases(), h.isReference()) );
+        }
+        final int numHaplotypes = haplotypes.size();
+
+        // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+        final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+        for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+            final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
+            final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, sample);
+            int glIndex = 0;
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                }
+            }
+            genotypes.add(new GenotypeBuilder(sample, noCall).PL(genotypeLikelihoods).make());
+        }
+        final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder().loc(activeRegionWindow).alleles(allelesToGenotype).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
+        if( call == null ) { return Collections.emptyList(); } // exact model says that the call confidence is below the specified confidence threshold so nothing to do here
+
+        // Prepare the list of haplotypes that need to be run through Smith-Waterman for output to VCF
+        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
+        for( final Haplotype h : haplotypes ) {
+            if( call.getAllele(h.getBases()) == null ) { // exact model removed this allele from the list so no need to run SW and output to VCF
+                haplotypesToRemove.add(h);
+            }
+        }
+        haplotypes.removeAll(haplotypesToRemove);
+
+        if( OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+            final List<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>> returnVCs = new ArrayList<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>>();
+            // set up the default 1-to-1 haplotype mapping object
+            final HashMap<Allele,ArrayList<Haplotype>> haplotypeMapping = new HashMap<Allele,ArrayList<Haplotype>>();
+            for( final Haplotype h : haplotypes ) {
+                final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+                list.add(h);
+                haplotypeMapping.put(call.getAllele(h.getBases()), list);
+            }
+            returnVCs.add( new Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>(call,haplotypeMapping) );
+            return returnVCs;
+        }
+
+        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+
+        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
+        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
+        int count = 0;
+        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
+        for( final Haplotype h : haplotypes ) {
+            if( DEBUG ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+            }
+            // Walk along the alignment and turn any difference from the reference into an event
+            h.setEventMap( generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
+            startPosKeySet.addAll(h.getEventMap().keySet());
+        }
+        
+        // Create the VC merge priority list
+        final ArrayList<String> priorityList = new ArrayList<String>();
+        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
+            priorityList.add("HC" + iii);
+        }
+        
+        // Walk along each position in the key set and create each event to be outputted
+        for( final int loc : startPosKeySet ) {
+            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
+                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
+                for( final Haplotype h : haplotypes ) {
+                    final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
+                    final VariantContext vc = eventMap.get(loc);
+                    if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
+                        eventsAtThisLoc.add(vc);
+                    }
+                }
+                
+                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
+                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                // Merge the event to find a common reference representation
+                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+
+                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
+                int aCount = 0;
+                for( final Allele a : mergedVC.getAlleles() ) {
+                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
+                }
+
+                if( DEBUG ) {
+                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
+                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
+                }
+
+                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+                final GenotypesContext myGenotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+                    final int myNumHaplotypes = alleleMapper.size();
+                    final double[] genotypeLikelihoods = new double[myNumHaplotypes * (myNumHaplotypes+1) / 2];
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    int glIndex = 0;
+                    for( int iii = 0; iii < myNumHaplotypes; iii++ ) {
+                        for( int jjj = 0; jjj <= iii; jjj++ ) {
+                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                        }
+                    }
+
+                    // using the allele mapping object translate the haplotype allele into the event allele
+                    final Genotype g = new GenotypeBuilder(sample)
+                            .alleles(findEventAllelesInSample(mergedVC.getAlleles(), call.getAlleles(), call.getGenotype(sample).getAlleles(), alleleMapper, haplotypes))
+                            .phased(loc != startPosKeySet.first())
+                            .PL(genotypeLikelihoods).make();
+                    myGenotypes.add(g);
+                }
+                returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(
+                                 new VariantContextBuilder(mergedVC).log10PError(call.getLog10PError()).genotypes(myGenotypes).make(), alleleHashMap) );
+            }
+        }
+        return returnCalls;
+    }
+
+    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
+    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList<Haplotype> haplotypes, final byte[] ref, final GenomeLoc refLoc,
+                                                                                                                               final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList<VariantContext> activeAllelesToGenotype ) {
+
+        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+
+        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
+        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
+        int count = 0;
+        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
+        for( final Haplotype h : haplotypes ) {
+            // Walk along the alignment and turn any difference from the reference into an event
+            h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++, MNP_LOOK_AHEAD ) );
+            if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
+            if( DEBUG ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+                System.out.println( "> Left and right breaks = (" + h.leftBreakPoint + " , " + h.rightBreakPoint + ")");
+                System.out.println( ">> Events = " + h.getEventMap());
+            }
+        }
+        // Create the VC merge priority list
+        final ArrayList<String> priorityList = new ArrayList<String>();
+        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
+            priorityList.add("HC" + iii);
+        }
+
+        cleanUpSymbolicUnassembledEvents( haplotypes, priorityList );
+        if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
+            mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
+        }
+        if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
+            for( final VariantContext compVC : activeAllelesToGenotype ) {
+                startPosKeySet.add( compVC.getStart() );
+            }
+        }
+
+
+        // Walk along each position in the key set and create each event to be outputted
+        for( final int loc : startPosKeySet ) {
+            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
+                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
+                if( activeAllelesToGenotype.isEmpty() ) {
+                    for( final Haplotype h : haplotypes ) {
+                        final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
+                        final VariantContext vc = eventMap.get(loc);
+                        if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
+                            eventsAtThisLoc.add(vc);
+                        }
+                    }
+                } else { // we are in GGA mode!
+                    for( final VariantContext compVC : activeAllelesToGenotype ) {
+                        if( compVC.getStart() == loc ) {
+                            priorityList.clear();
+                            int alleleCount = 0;
+                            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                                HashSet<Allele> alleleSet = new HashSet<Allele>(2);
+                                alleleSet.add(compVC.getReference());
+                                alleleSet.add(compAltAllele);
+                                priorityList.add("Allele" + alleleCount);
+                                eventsAtThisLoc.add(new VariantContextBuilder(compVC).alleles(alleleSet).source("Allele"+alleleCount).make());
+                                alleleCount++;
+                            }
+                        }
+                    }
+                }
+
+                if( eventsAtThisLoc.isEmpty() ) { continue; }
+
+                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
+                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                // Merge the event to find a common reference representation
+                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+                if( mergedVC == null ) { continue; }
+
+                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
+                int aCount = 0;
+                for( final Allele a : mergedVC.getAlleles() ) {
+                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
+                }
+
+                if( DEBUG ) {
+                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
+                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
+                }
+
+                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
+                final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
+                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+                    final int numHaplotypes = alleleMapper.size();
+                    final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    int glIndex = 0;
+                    for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                        for( int jjj = 0; jjj <= iii; jjj++ ) {
+                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
+                        }
+                    }
+                    genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
+                }
+                final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
+
+                if( call != null ) {
+                    returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
+                }
+            }
+        }
+        return returnCalls;
+    }
+
+    protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes, final ArrayList<String> priorityList ) {
+        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
+        final ArrayList<String> stringsToRemove = new ArrayList<String>();
+        for( final Haplotype h : haplotypes ) {
+            for( final VariantContext vc : h.getEventMap().values() ) {
+                if( vc.isSymbolic() ) {
+                    for( final Haplotype h2 : haplotypes ) {
+                        for( final VariantContext vc2 : h2.getEventMap().values() ) {
+                            if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) {
+                                haplotypesToRemove.add(h);
+                                stringsToRemove.add(vc.getSource());
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        haplotypes.removeAll(haplotypesToRemove);
+        priorityList.removeAll(stringsToRemove);
+    }
+
+    protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
+        final int MAX_SIZE_TO_COMBINE = 10;
+        final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
+        if( startPosKeySet.size() <= 1 ) { return; }
+
+        boolean mapWasUpdated = true;
+        while( mapWasUpdated ) {
+            mapWasUpdated = false;
+
+            // loop over the set of start locations and consider pairs that start near each other
+            final Iterator<Integer> iter = startPosKeySet.iterator();
+            int thisStart = iter.next();
+            while( iter.hasNext() ) {
+                final int nextStart = iter.next();
+                if( nextStart - thisStart < MAX_SIZE_TO_COMBINE) {
+                    boolean isBiallelic = true;
+                    VariantContext thisVC = null;
+                    VariantContext nextVC = null;
+                    int x11 = 0;
+                    int x12 = 0;
+                    int x21 = 0;
+                    int x22 = 0;
+
+                    for( final Haplotype h : haplotypes ) {
+                        // only make complex substitutions out of consecutive biallelic sites
+                        final VariantContext thisHapVC = h.getEventMap().get(thisStart);
+                        if( thisHapVC != null && !thisHapVC.isSymbolic() ) { // something was found at this location on this haplotype
+                            if( thisVC == null ) {
+                                thisVC = thisHapVC;
+                            } else if( !thisHapVC.hasSameAllelesAs( thisVC ) ) {
+                                isBiallelic = false;
+                                break;
+                            }
+                        }
+                        final VariantContext nextHapVC = h.getEventMap().get(nextStart);
+                        if( nextHapVC != null && !nextHapVC.isSymbolic() ) { // something was found at the next location on this haplotype
+                            if( nextVC == null ) {
+                                nextVC = nextHapVC;
+                            } else if( !nextHapVC.hasSameAllelesAs( nextVC ) ) {
+                                isBiallelic = false;
+                                break;
+                            }
+                        }
+                        // count up the co-occurrences of the events for the R^2 calculation
+                        // BUGBUG: use haplotype likelihoods per-sample to make this more accurate
+                        if( thisHapVC == null ) {
+                            if( nextHapVC == null ) { x11++; }
+                            else { x12++; }
+                        } else {
+                            if( nextHapVC == null ) { x21++; }
+                            else { x22++; }
+                        }
+                    }
+                    if( thisVC == null || nextVC == null ) {
+                        continue;
+                        //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
+                    }
+                    if( isBiallelic ) {
+                        final double R2 = calculateR2LD( x11, x12, x21, x22 );
+                        if( DEBUG ) {
+                            System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
+                            System.out.println("-- " + thisVC);
+                            System.out.println("-- " + nextVC);
+                        }
+                        if( R2 > MERGE_EVENTS_R2_THRESHOLD ) {
+
+                            byte[] refBases = thisVC.getReference().getBases();
+                            byte[] altBases = thisVC.getAlternateAllele(0).getBases();
+                            final int thisPadding = ( thisVC.getReference().isNull() || thisVC.getAlternateAllele(0).isNull() ? 1 : 0 );
+                            final int nextPadding = ( nextVC.getReference().isNull() || nextVC.getAlternateAllele(0).isNull() ? 1 : 0 );
+                            for( int locus = thisStart + refBases.length + thisPadding ; locus < nextStart + nextPadding; locus++ ) {
+                                final byte refByte = ref[ locus - refLoc.getStart() ];
+                                refBases = ArrayUtils.add(refBases, refByte);
+                                altBases = ArrayUtils.add(altBases, refByte);
+                            }
+                            refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
+                            altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
+
+                            final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
+                            mergedAlleles.add( Allele.create( refBases, true ) );
+                            mergedAlleles.add( Allele.create( altBases, false ) );
+                            final VariantContext mergedVC = ( refBases.length == altBases.length ?
+                                    new VariantContextBuilder("MNP", thisVC.getChr(), thisVC.getStart() + (thisVC.isIndel() && !thisVC.isComplexIndel() ? 1 : 0) , nextVC.getEnd(), mergedAlleles).make() :
+                                    new VariantContextBuilder("Complex", thisVC.getChr(), thisVC.getStart(), nextVC.getEnd(), mergedAlleles).referenceBaseForIndel(ref[thisStart-refLoc.getStart()]).make() );
+
+                            // remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event
+                            for( final Haplotype h : haplotypes ) {
+                                final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
+                                if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) {
+                                    eventMap.remove(thisStart);
+                                    eventMap.remove(nextStart);
+                                    eventMap.put(mergedVC.getStart(), mergedVC);
+                                }
+                            }
+                            startPosKeySet.add(mergedVC.getStart());
+                            boolean containsStart = false;
+                            boolean containsNext = false;
+                            for( final Haplotype h : haplotypes ) {
+                                final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
+                                if( eventMap.containsKey(thisStart) ) { containsStart = true; }
+                                if( eventMap.containsKey(nextStart) ) { containsNext = true; }
+                            }
+                            if(!containsStart) { startPosKeySet.remove(thisStart); }
+                            if(!containsNext) { startPosKeySet.remove(nextStart); }
+
+                            if( DEBUG ) { System.out.println("====> " + mergedVC); }
+                            mapWasUpdated = true;
+                            break; // break out of tree set iteration since it was just updated, start over from the beginning and keep merging events
+                        }
+                    }
+                }
+                thisStart = nextStart;
+            }
+        }
+    }
+
+    @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
+    @Ensures({"result >= 0.0", "result <= 1.0"})
+    protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
+        final int total = x11 + x12 + x21 + x22;
+        final double pa1b1 = ((double) x11) / ((double) total);
+        final double pa1b2 = ((double) x12) / ((double) total);
+        final double pa2b1 = ((double) x21) / ((double) total);
+        final double pa1 = pa1b1 + pa1b2;
+        final double pb1 = pa1b1 + pa2b1;
+        return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
+    }
+
+    @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
+    @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
+    protected static ArrayList<ArrayList<Haplotype>> createAlleleMapper( final int loc, final ArrayList<VariantContext> eventsAtThisLoc, final ArrayList<Haplotype> haplotypes ) {
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        final ArrayList<Haplotype> refList = new ArrayList<Haplotype>();
+        for( final Haplotype h : haplotypes ) {
+            if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
+                refList.add(h);
+            } else {
+                boolean foundInEventList = false;
+                for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
+                    if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
+                        foundInEventList = true;
+                    }
+                }
+                if( !foundInEventList ) { // event at this location isn't one of the genotype-able options (during GGA) so this is a reference-supporting haplotype
+                    refList.add(h);
+                }
+            }
+        }
+        alleleMapper.add(refList);
+        for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            for( final Haplotype h : haplotypes ) {
+                if( h.getEventMap().get(loc) != null && h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
+                    list.add(h);
+                }
+            }
+            alleleMapper.add(list);
+        }
+        return alleleMapper;
+    }
+
+    @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
+    protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
+        if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
+        final ArrayList<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        for( final Allele a : haplotypeAllelesForSample ) {
+            final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
+            for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
+                final ArrayList<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
+                if( mappedHaplotypes.contains(haplotype) ) {
+                    eventAllelesForSample.add(eventAlleles.get(iii));
+                    break;
+                }
+            }
+        }
+        return eventAllelesForSample;
+    }
+
+    protected static boolean containsVCWithMatchingAlleles( final List<VariantContext> list, final VariantContext vcToTest ) {
+        for( final VariantContext vc : list ) {
+            if( vc.hasSameAllelesAs(vcToTest) ) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
+        return generateVCsFromAlignment(null, alignmentStartHapwrtRef, cigar, ref, alignment, refLoc, sourceNameToAdd, MNP_LOOK_AHEAD); // BUGBUG: needed for compatibility with HaplotypeResolver code
+    }
+
+    protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd, final int MNP_LOOK_AHEAD ) {
+        final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
+
+        int refPos = alignmentStartHapwrtRef;
+        if( refPos < 0 ) { return null; } // Protection against SW failures
+        int alignmentPos = 0;
+
+        for( final CigarElement ce : cigar.getCigarElements() ) {
+            final int elementLength = ce.getLength();
+            switch( ce.getOperator() ) {
+                case I:
+                    final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength );
+                    boolean allN = true;
+                    for( final byte b : insertionBases ) {
+                        if( b != (byte) 'N' ) {
+                            allN = false;
+                            break;
+                        }
+                    }
+                    if( !allN ) {
+                        final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
+                        final int insertionStart = refLoc.getStart() + refPos - 1;
+                        if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
+                            insertionAlleles.add( Allele.create(ref[refPos-1], true) );
+                            insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
+                            vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
+                        } else {
+                            insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) );
+                            insertionAlleles.add( Allele.create(insertionBases, false) );
+                            vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
+                        }
+
+                    }
+                    alignmentPos += elementLength;
+                    break;
+                case S:
+                    alignmentPos += elementLength;
+                    break;
+                case D:
+                    final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength );
+                    final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
+                    final int deletionStart = refLoc.getStart() + refPos - 1;
+                    // BUGBUG: how often does this symbolic deletion allele case happen?
+                    //if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength)
+                    //        || (haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) ) ) {
+                    //    deletionAlleles.add( Allele.create(ref[refPos-1], true) );
+                    //    deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
+                    //    vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make());
+                    //} else {
+                        deletionAlleles.add( Allele.create(deletionBases, true) );
+                        deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) );
+                        vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make());
+                    //}
+                    refPos += elementLength;
+                    break;
+                case M:
+                    int numSinceMismatch = -1;
+                    int stopOfMismatch = -1;
+                    int startOfMismatch = -1;
+                    int refPosStartOfMismatch = -1;
+                    for( int iii = 0; iii < elementLength; iii++ ) {
+                        if( ref[refPos] != alignment[alignmentPos] && alignment[alignmentPos] != ((byte) 'N') ) {
+                            // SNP or start of possible MNP
+                            if( stopOfMismatch == -1 ) {
+                                startOfMismatch = alignmentPos;
+                                stopOfMismatch = alignmentPos;
+                                numSinceMismatch = 0;
+                                refPosStartOfMismatch = refPos;
+                            } else {
+                                stopOfMismatch = alignmentPos;
+                            }
+                        }
+                        if( stopOfMismatch != -1) {
+                            numSinceMismatch++;
+                        }
+                        if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) {
+                            final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 );
+                            final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 );
+                            final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
+                            snpAlleles.add( Allele.create( refBases, true ) );
+                            snpAlleles.add( Allele.create( mismatchBases, false ) );
+                            final int snpStart = refLoc.getStart() + refPosStartOfMismatch;
+                            vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make());
+                            numSinceMismatch = -1;
+                            stopOfMismatch = -1;
+                            startOfMismatch = -1;
+                            refPosStartOfMismatch = -1;
+                        }
+                        refPos++;
+                        alignmentPos++;
+                    }
+                    break;
+                case N:
+                case H:
+                case P:
+                default:
+                    throw new ReviewedStingException( "Unsupported cigar operator created during SW alignment: " + ce.getOperator() );
+            }
+        }
+        return vcs;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -0,0 +1,558 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
+import org.broadinstitute.sting.utils.fragments.FragmentUtils;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.*;
+
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Call SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. Haplotypes are evaluated using an affine gap penalty Pair HMM.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * Input bam file(s) from which to make calls
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * VCF file with raw, unrecalibrated SNP and indel calls.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ *   java
+ *     -jar GenomeAnalysisTK.jar
+ *     -T HaplotypeCaller
+ *     -R reference/human_g1k_v37.fasta
+ *     -I input.bam
+ *     -o output.raw.snps.indels.vcf
+ * </pre>
+ *
+ * @author rpoplin
+ * @since 8/22/11
+ */
+
+@PartitionBy(PartitionType.LOCUS)
+@ActiveRegionExtension(extension=65, maxRegion=275)
+public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatibleWalker {
+
+    /**
+     * A raw, unfiltered, highly specific callset in VCF format.
+     */
+    @Output(doc="File to which variants should be written", required = true)
+    protected VariantContextWriter vcfWriter = null;
+
+    @Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false)
+    protected PrintStream graphWriter = null;
+
+    @Argument(fullName = "assembler", shortName = "assembler", doc = "Assembler to use; currently only SIMPLE_DE_BRUIJN is available.", required = false)
+    protected LocalAssemblyEngine.ASSEMBLER ASSEMBLER_TO_USE = LocalAssemblyEngine.ASSEMBLER.SIMPLE_DE_BRUIJN;
+
+    @Argument(fullName="keepRG", shortName="keepRG", doc="keepRG", required = false)
+    protected String keepRG = null;
+    
+    @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
+    protected int MNP_LOOK_AHEAD = 0;
+
+    @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = true)
+    protected int MIN_PRUNE_FACTOR = 0;
+
+    @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
+    protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
+
+    @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
+    protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
+
+    @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="gcpHMM", required = false)
+    protected int gcpHMM = 10;
+
+    @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage per sample to downsample each region to", required = false)
+    protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
+
+    @Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
+    protected boolean USE_EXPANDED_TRIGGER_SET = false;
+
+    @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
+    protected boolean USE_ALLELES_TRIGGER = false;
+
+    /**
+     * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
+     * dbSNP is not used in any way for the calculations themselves.
+     */
+    @ArgumentCollection
+    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
+    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
+
+    /**
+     * If a call overlaps with a record from the provided comp track, the INFO field will be annotated
+     *  as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
+     *  Records that are filtered in the comp track will be ignored.
+     *  Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
+     */
+    @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
+    public List<RodBinding<VariantContext>> comps = Collections.emptyList();
+    public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
+
+    // The following are not used by the Unified Genotyper
+    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
+    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
+    public boolean alwaysAppendDbsnpId() { return false; }
+
+    /**
+     * Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
+     */
+    @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
+    protected List<String> annotationsToUse = new ArrayList<String>(Arrays.asList(new String[]{"ClippingRankSumTest"}));
+
+    /**
+     * Which annotations to exclude from output in the VCF file.  Note that this argument has higher priority than the -A or -G arguments,
+     * so annotations will be excluded even if they are explicitly included with the other options.
+     */
+    @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
+    protected List<String> annotationsToExclude = new ArrayList<String>(Arrays.asList(new String[]{"HaplotypeScore", "MappingQualityZero", "SpanningDeletions", "TandemRepeatAnnotator"}));
+
+    /**
+     * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
+     */
+    @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
+    protected String[] annotationClassesToUse = { "Standard" };
+
+    @ArgumentCollection
+    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
+
+    // the calculation arguments
+    private UnifiedGenotyperEngine UG_engine = null;
+    private UnifiedGenotyperEngine UG_engine_simple_genotyper = null;
+    
+    @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
+    protected boolean DEBUG;
+
+    @Argument(fullName="doBanded", shortName="doBanded", doc="If specified, use the banded option", required = false)
+    protected boolean doBanded = false;
+
+    // the assembly engine
+    LocalAssemblyEngine assemblyEngine = null;
+
+    // the likelihoods engine
+    LikelihoodCalculationEngine likelihoodCalculationEngine = null;
+
+    // the genotyping engine
+    GenotypingEngine genotypingEngine = null;
+
+    // the annotation engine
+    private VariantAnnotatorEngine annotationEngine;
+
+    // fasta reference reader to supplement the edges of the reference sequence
+    private IndexedFastaSequenceFile referenceReader;
+
+    // reference base padding size
+    private static final int REFERENCE_PADDING = 900;
+
+    // bases with quality less than or equal to this value are trimmed off the tails of the reads
+    private static final byte MIN_TAIL_QUALITY = 20;
+
+    private ArrayList<String> samplesList = new ArrayList<String>();
+    private final static double LOG_ONE_HALF = -Math.log10(2.0);
+    private final static double LOG_ONE_THIRD = -Math.log10(3.0);
+    private final ArrayList<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
+
+    private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
+    private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // initialize
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public void initialize() {
+        super.initialize();
+
+        // get all of the unique sample names
+        Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
+        samplesList.addAll( samples );
+        // initialize the UnifiedGenotyper Engine which is used to call into the exact model
+        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+        UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+
+        // initialize the output VCF header
+        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
+
+        Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
+
+        // all annotation fields from VariantAnnotatorEngine
+        headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
+        // all callers need to add these standard annotation header lines
+        VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
+                VCFConstants.DOWNSAMPLED_KEY,
+                VCFConstants.MLE_ALLELE_COUNT_KEY,
+                VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
+        // all callers need to add these standard FORMAT field header lines
+        VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
+                VCFConstants.GENOTYPE_KEY,
+                VCFConstants.GENOTYPE_QUALITY_KEY,
+                VCFConstants.DEPTH_KEY,
+                VCFConstants.GENOTYPE_PL_KEY);
+        // header lines for the experimental HaplotypeCaller-specific annotations
+        headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant"));
+        headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region"));
+        headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles"));
+        headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL"));
+        headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX"));
+        headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant"));
+
+        vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
+
+        try {
+            // fasta reference reader to supplement the edges of the reference sequence
+            referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
+        } catch( FileNotFoundException e ) {
+            throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
+        }
+
+        assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
+        likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, doBanded );
+        genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // isActive
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    // enable deletions in the pileup
+    @Override
+    public boolean includeReadsWithDeletionAtLoci() { return true; }
+
+    // enable non primary reads in the active region
+    @Override
+    public boolean wantsNonPrimaryReads() { return true; }
+
+    @Override
+    @Ensures({"result >= 0.0", "result <= 1.0"})
+    public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
+
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
+                if( !allelesToGenotype.contains(vc) ) {
+                    allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a ReadMetaDataTracker object
+                }
+            }
+            if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
+                return 1.0;
+            }
+        }
+
+        if( USE_ALLELES_TRIGGER ) {
+            return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
+        }
+
+        if( context == null ) { return 0.0; }
+
+        final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
+        noCall.add(Allele.NO_CALL);
+
+        final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
+        final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
+        for( final String sample : splitContexts.keySet() ) {
+            final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
+            Arrays.fill(genotypeLikelihoods, 0.0);
+
+            for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) {
+                final byte qual = ( USE_EXPANDED_TRIGGER_SET ?
+                                        ( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() )
+                                        : p.getQual() );
+                if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) {
+                    int AA = 0; final int AB = 1; int BB = 2;
+                    if( USE_EXPANDED_TRIGGER_SET ) {
+                        if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ||
+                                (!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) {
+                            AA = 2;
+                            BB = 0;
+                        }
+                    } else {
+                        if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
+                            AA = 2;
+                            BB = 0;
+                        }
+                    }
+                    genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
+                    genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF );
+                    genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD;
+                }
+            }
+            genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
+        }
+
+        final ArrayList<Allele> alleles = new ArrayList<Allele>();
+        alleles.add( FAKE_REF_ALLELE );
+        alleles.add( FAKE_ALT_ALLELE );
+        final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
+        return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // map
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public Integer map( final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker ) {
+
+        final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
+
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            for( final VariantContext vc : allelesToGenotype ) {
+                if( activeRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) {
+                    activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode
+                }
+            }
+            allelesToGenotype.removeAll( activeAllelesToGenotype );
+        }
+
+        if( !activeRegion.isActive ) { return 0; } // Not active so nothing to do!
+        if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do!
+        if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
+
+        finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails
+        final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader)); // Create the reference haplotype which is the bases from the reference that make up the active region
+        referenceHaplotype.setIsReference(true);
+        final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
+        //int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion ));
+        final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, getPaddedLoc(activeRegion), MIN_PRUNE_FACTOR, activeAllelesToGenotype );
+        if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
+
+        activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
+        final List<GATKSAMRecord> filteredReads = filterNonPassingReads( activeRegion ); // filter out reads from genotyping which fail mapping quality based criteria
+        if( activeRegion.size() == 0 ) { return 1; } // no reads remain after filtering so nothing else to do!
+
+        // evaluate each sample's reads against all haplotypes
+        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList = splitReadsBySample( activeRegion.getReads() );
+        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
+        likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
+
+        // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
+        final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
+
+        for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
+                ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
+                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() )
+                  : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
+            if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
+
+            final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
+            final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
+
+            // add some custom annotations to the calls
+            final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
+            // Calculate the number of variants on the haplotype
+            int maxNumVar = 0;
+            for( final Allele allele : callResult.getFirst().getAlleles() ) {
+                if( !allele.isReference() ) {
+                    for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
+                        final int numVar = haplotype.getEventMap().size();
+                        if( numVar > maxNumVar ) { maxNumVar = numVar; }
+                    }
+                }
+            }
+            // Calculate the event length
+            int maxLength = 0;
+            for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
+                final int length = a.length() - annotatedCall.getReference().length();
+                if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
+            }
+
+            myAttributes.put("NVH", maxNumVar);
+            myAttributes.put("NumHapEval", bestHaplotypes.size());
+            myAttributes.put("NumHapAssembly", haplotypes.size());
+            myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
+            myAttributes.put("EVENTLENGTH", maxLength);
+            myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
+            myAttributes.put("extType", annotatedCall.getType().toString() );
+
+            //if( likelihoodCalculationEngine.haplotypeScore != null ) {
+            //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
+            //}
+            if( annotatedCall.hasAttribute("QD") ) {
+                myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
+            }
+
+            vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
+        }
+
+        if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); }
+
+        return 1; // One active region was processed during this map call
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // reduce
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public Integer reduceInit() {
+        return 0;
+    }
+
+    @Override
+    public Integer reduce(Integer cur, Integer sum) {
+        return cur + sum;
+    }
+
+    @Override
+    public void onTraversalDone(Integer result) {
+        logger.info("Ran local assembly on " + result + " active regions");
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // private helper functions
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    private void finalizeActiveRegion( final ActiveRegion activeRegion ) {
+        if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
+        final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
+        final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
+        activeRegion.clearReads();
+
+        // Join overlapping paired reads to create a single longer read
+        finalizedReadList.addAll( fragmentCollection.getSingletonReads() );
+        for( final List<GATKSAMRecord> overlappingPair : fragmentCollection.getOverlappingPairs() ) {
+            finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) );
+        }
+
+        Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator());
+
+        // Loop through the reads hard clipping the adaptor and low quality tails
+        for( final GATKSAMRecord myRead : finalizedReadList ) {
+            final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
+            if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
+                final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
+                // protect against INTERVALS with abnormally high coverage
+                if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
+                    activeRegion.add(clippedRead);
+                }
+            }
+        }
+    }
+
+    private List<GATKSAMRecord> filterNonPassingReads( final ActiveRegion activeRegion ) {
+        final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
+        for( final GATKSAMRecord rec : activeRegion.getReads() ) {
+            if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
+                readsToRemove.add(rec);
+            }
+        }
+        activeRegion.removeAll( readsToRemove );
+        return readsToRemove;
+    }
+
+    private GenomeLoc getPaddedLoc( final ActiveRegion activeRegion ) {
+        final int padLeft = Math.max(activeRegion.getReferenceLoc().getStart()-REFERENCE_PADDING, 1);
+        final int padRight = Math.min(activeRegion.getReferenceLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReferenceLoc().getContig()).getSequenceLength());
+        return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReferenceLoc().getContig(), padLeft, padRight);
+    }
+
+    private HashMap<String, ArrayList<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
+        final HashMap<String, ArrayList<GATKSAMRecord>> returnMap = new HashMap<String, ArrayList<GATKSAMRecord>>();
+        for( final String sample : samplesList) {
+            ArrayList<GATKSAMRecord> readList = returnMap.get( sample );
+            if( readList == null ) {
+                readList = new ArrayList<GATKSAMRecord>();
+                returnMap.put(sample, readList);
+            }
+        }
+        for( final GATKSAMRecord read : reads ) {
+            returnMap.get(read.getReadGroup().getSample()).add(read);
+        }
+
+        return returnMap;
+    }
+
+    /*
+    private int determinePruneFactorFromCoverage( final ActiveRegion activeRegion ) {
+        final ArrayList<Integer> readLengthDistribution = new ArrayList<Integer>();
+        for( final GATKSAMRecord read : activeRegion.getReads() ) {
+            readLengthDistribution.add(read.getReadLength());
+        }
+        final double meanReadLength = MathUtils.average(readLengthDistribution);
+        final double meanCoveragePerSample = (double) activeRegion.getReads().size() / ((double) activeRegion.getExtendedLoc().size() / meanReadLength) / (double) samplesList.size();
+        int PRUNE_FACTOR = 0;
+        if( meanCoveragePerSample > 8.5 ) {
+            PRUNE_FACTOR = (int) Math.floor( Math.sqrt( meanCoveragePerSample - 5.0 ) );
+        } else if( meanCoveragePerSample > 3.0 ) {
+            PRUNE_FACTOR = 1;
+        }
+
+        if( DEBUG ) { System.out.println(String.format("Mean coverage per sample = %.1f --> prune factor = %d", meanCoveragePerSample, PRUNE_FACTOR)); }
+        return PRUNE_FACTOR;
+    }
+    */
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Reference;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.Window;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.SWPairwiseAlignment;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
+
+import java.util.*;
+
+/**
+ * Haplotype-based resolution of variants in 2 different eval files.
+ *
+ * <p>
+ * HaplotypeResolver is a tool that takes 2 VCF files and constructs haplotypes based on the variants inside them.
+ * From that, it can resolve potential differences in variant calls that are inherently the same (or similar) variants.
+ * Records are annotated with the set and status attributes.
+ *
+ * <h2>Input</h2>
+ * <p>
+ * 2 variant files to resolve.
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * A single consensus VCF.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx1g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T HaplotypeResolver \
+ *   -V:v1 input1.vcf \
+ *   -V:v2 input2.vcf \
+ *   -o output.vcf
+ * </pre>
+ *
+ */
+@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
+public class HaplotypeResolver extends RodWalker<Integer, Integer> {
+
+    protected static final String INTERSECTION_SET = "intersection";
+    protected static final String SAME_STATUS = "same";
+    protected static final String SOME_ALLELES_MATCH_STATUS = "someAllelesMatch";
+    protected static final String SAME_START_DIFFERENT_ALLELES_STATUS = "sameStartDifferentAlleles";
+    protected static final String SAME_BY_HAPLOTYPE_STATUS = "sameByHaplotype";
+    protected static final String ONE_ALLELE_SUBSET_OF_OTHER_STATUS = "OneAlleleSubsetOfOther";
+    protected static final String OVERLAPPING_EVENTS_STATUS = "overlappingEvents";
+
+    protected final static int MAX_DISTANCE_BETWEEN_MERGED_RECORDS = 50;
+    protected final static int MAX_HAPLOTYPE_TO_CONSIDER = 1000;
+    protected final static int MAX_VARIANT_SIZE_TO_CONSIDER = 100;
+    protected final static int ACTIVE_WINDOW = MAX_HAPLOTYPE_TO_CONSIDER + MAX_VARIANT_SIZE_TO_CONSIDER;
+
+    @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
+    public List<RodBinding<VariantContext>> variants;
+
+    @Output(doc="File to which variants should be written", required=true)
+    protected VariantContextWriter baseWriter = null;
+    private VariantContextWriter writer;
+
+    /**
+     * Set to 'null' if you don't want the set field emitted.
+     */
+    @Argument(fullName="setKey", shortName="setKey", doc="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from", required=false)
+    protected String SET_KEY = "set";
+
+    /**
+     * Set to 'null' if you don't want the status field emitted.
+     */
+    @Argument(fullName="statusKey", shortName="statusKey", doc="Key used in the INFO key=value tag emitted describing the extent to which records match", required=false)
+    protected String STATUS_KEY = "status";
+
+    private final LinkedList<VCcontext> queue = new LinkedList<VCcontext>();
+    private String source1, source2;
+    private final List<VariantContext> sourceVCs1 = new ArrayList<VariantContext>();
+    private final List<VariantContext> sourceVCs2 = new ArrayList<VariantContext>();
+
+
+    private class VCcontext {
+        public final Collection<VariantContext> vcs;
+        public final GenomeLoc loc;
+        public final ReferenceContext ref;
+
+        public VCcontext(final Collection<VariantContext> vcs, final ReferenceContext ref) {
+            this.vcs = vcs;
+            this.loc = getToolkit().getGenomeLocParser().createGenomeLoc(vcs.iterator().next());
+            this.ref = ref;
+        }
+    }
+
+    public void initialize() {
+
+        if ( variants.size() != 2 ) {
+            throw new UserException.BadArgumentValue("variant", "this tool requires exactly 2 input variant files");
+        }
+        source1 = variants.get(0).getName();
+        source2 = variants.get(1).getName();
+
+        if ( SET_KEY.toLowerCase().equals("null") )
+            SET_KEY = null;
+        if ( STATUS_KEY.toLowerCase().equals("null") )
+            STATUS_KEY = null;
+
+        // for now, INFO and FORMAT fields are not propagated to the output VCF (so they aren't put into the header)
+        Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
+        if ( SET_KEY != null )
+            headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record"));
+        if ( STATUS_KEY != null )
+            headerLines.add(new VCFInfoHeaderLine(STATUS_KEY, 1, VCFHeaderLineType.String, "Extent to which records match"));
+        final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.<String>emptySet());
+        baseWriter.writeHeader(vcfHeader);
+        writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, ACTIVE_WINDOW);
+    }
+
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( tracker == null )
+            return 0;
+
+        final Collection<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
+        if ( VCs.size() == 0 )
+            return 0;
+
+        final VCcontext vc = new VCcontext(VariantContextUtils.sitesOnlyVariantContexts(VCs), ref);
+
+        // TODO -- what should we do about filtered records?
+
+        if ( !queue.isEmpty() ) {
+
+            final VCcontext previous = queue.getLast();
+            if ( !previous.loc.onSameContig(vc.loc) ||
+                    previous.loc.distance(vc.loc) > MAX_DISTANCE_BETWEEN_MERGED_RECORDS ||
+                    queue.getFirst().loc.distance(vc.loc) > MAX_HAPLOTYPE_TO_CONSIDER ) {
+                purgeQueue();
+            }
+        }
+
+        queue.addLast(vc);
+        return 0;
+    }
+
+    public Integer reduceInit() { return 0; }
+
+    public Integer reduce(Integer value, Integer sum) {
+        return sum + value;
+    }
+
+    public void onTraversalDone(Integer result) {
+        if ( !queue.isEmpty() )
+            purgeQueue();
+        writer.close();
+    }
+
+    private void purgeQueue() {
+
+        final ReferenceContext refContext = queue.getFirst().ref;
+
+        // divide them up by source
+        while ( !queue.isEmpty() ) {
+            VCcontext context = queue.removeFirst();
+            for ( final VariantContext vc: context.vcs ) {
+                if ( vc.getSource().equals(source1) )
+                    sourceVCs1.add(vc);
+                else
+                    sourceVCs2.add(vc);
+            }
+        }
+
+        writeAndPurgeAllEqualVariants(sourceVCs1, sourceVCs2, SAME_STATUS);
+
+        if ( sourceVCs1.isEmpty() ) {
+            writeAll(sourceVCs2, source2, null);
+        } else if ( sourceVCs2.isEmpty() ) {
+            writeAll(sourceVCs1, source1, null);
+        } else {
+            resolveByHaplotype(refContext);
+        }
+
+        // allow for GC of the data
+        sourceVCs1.clear();
+        sourceVCs2.clear();
+    }
+
+    private void writeAll(final List<VariantContext> sourceVCs, final String set, final String status) {
+        for ( final VariantContext vc : sourceVCs ) {
+            writeOne(vc, set, status);
+        }
+    }
+
+    private void writeOne(final VariantContext vc, final String set, final String status) {
+        final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
+        if ( SET_KEY != null && set != null )
+            attrs.put(SET_KEY, set);
+        if ( STATUS_KEY != null && status != null )
+            attrs.put(STATUS_KEY, status);
+        writer.add(new VariantContextBuilder(vc).attributes(attrs).make());
+    }
+
+    private void writeAndPurgeAllEqualVariants(final List<VariantContext> sourceVCs1, final List<VariantContext> sourceVCs2, final String status) {
+
+        int currentIndex1 = 0, currentIndex2 = 0;
+        int size1 = sourceVCs1.size(), size2 = sourceVCs2.size();
+        VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+        VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+
+        while ( current1 != null && current2 != null ) {
+
+            final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
+            final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);
+
+            if ( loc1.equals(loc2) ||
+                    (loc1.getStart() == loc2.getStart() && (current1.getAlternateAlleles().size() > 1 || current2.getAlternateAlleles().size() > 1)) ) {
+                // test the alleles
+                if ( determineAndWriteOverlap(current1, current2, status) ) {
+                    sourceVCs1.remove(currentIndex1);
+                    sourceVCs2.remove(currentIndex2);
+                    size1--;
+                    size2--;
+                } else {
+                    currentIndex1++;
+                    currentIndex2++;
+                }
+                current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+                current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+            } else if ( loc1.isBefore(loc2) ) {
+                currentIndex1++;
+                current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1): null);
+            } else {
+                currentIndex2++;
+                current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2): null);
+            }
+        }
+    }
+
+    private boolean determineAndWriteOverlap(final VariantContext vc1, final VariantContext vc2, final String status) {
+        final int allelesFrom1In2 = findOverlap(vc1, vc2);
+        final int allelesFrom2In1 = findOverlap(vc2, vc1);
+        final int totalAllelesIn1 = vc1.getAlternateAlleles().size();
+        final int totalAllelesIn2 = vc2.getAlternateAlleles().size();
+
+        final boolean allAllelesFrom1Overlap = allelesFrom1In2 == totalAllelesIn1;
+        final boolean allAllelesFrom2Overlap = allelesFrom2In1 == totalAllelesIn2;
+
+        boolean thereIsOverlap = true;
+
+        if ( allAllelesFrom1Overlap && allAllelesFrom2Overlap ) {
+            writeOne(vc1, INTERSECTION_SET, status);
+        } else if ( allAllelesFrom1Overlap ) {
+            writeOne(vc2, INTERSECTION_SET, source1 + "IsSubsetOf" + source2);
+        } else if ( allAllelesFrom2Overlap ) {
+            writeOne(vc1, INTERSECTION_SET, source2 + "IsSubsetOf" + source1);
+        } else if ( allelesFrom1In2 > 0 ) {
+            writeOne(vc1, INTERSECTION_SET, SOME_ALLELES_MATCH_STATUS);
+        } else if ( totalAllelesIn1 > 1 || totalAllelesIn2 > 1 ) { // we don't handle multi-allelics in the haplotype-based reconstruction
+            writeOne(vc1, INTERSECTION_SET, SAME_START_DIFFERENT_ALLELES_STATUS);
+        } else {
+            thereIsOverlap = false;
+        }
+
+        return thereIsOverlap;
+    }
+
+    private static int findOverlap(final VariantContext target, final VariantContext comparison) {
+        int overlap = 0;
+        for ( final Allele allele : target.getAlternateAlleles() ) {
+            if ( comparison.hasAlternateAllele(allele) )
+                overlap++;
+        }
+        return overlap;
+    }
+
+    private static final double SW_MATCH = 4.0;
+    private static final double SW_MISMATCH = -10.0;
+    private static final double SW_GAP = -25.0;
+    private static final double SW_GAP_EXTEND = -1.3;
+    private void resolveByHaplotype(final ReferenceContext refContext) {
+
+        final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext);
+        final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext);
+
+        final SWPairwiseAlignment swConsensus1 = new SWPairwiseAlignment( refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+
+        // protect against SW failures
+        if( swConsensus1.getCigar().toString().contains("S") || swConsensus1.getCigar().getReferenceLength() < 20 ||
+                swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() < 20 ) {
+            // TODO -- handle errors appropriately
+            logger.debug("Bad SW alignment; aborting at " + refContext.getLocus());
+            return;
+        }
+
+        // order results by start position
+        final TreeMap<Integer, VariantContext> source1Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus1.getCigar(), refContext.getBases(), source1Haplotype, refContext.getWindow(), source1, 0));
+        final TreeMap<Integer, VariantContext> source2Map = new TreeMap<Integer, VariantContext>(GenotypingEngine.generateVCsFromAlignment(0, swConsensus2.getCigar(), refContext.getBases(), source2Haplotype, refContext.getWindow(), source2, 0));
+        if ( source1Map.size() == 0 || source2Map.size() == 0 ) {
+            // TODO -- handle errors appropriately
+            logger.debug("No source alleles; aborting at " + refContext.getLocus());
+            return;
+        }
+
+        // create lists and test for equality
+        final List<VariantContext> source1Alleles = new ArrayList<VariantContext>(source1Map.values());
+        final List<VariantContext> source2Alleles = new ArrayList<VariantContext>(source2Map.values());
+
+        writeAndPurgeAllEqualVariants(source1Alleles, source2Alleles, SAME_BY_HAPLOTYPE_STATUS);
+        if ( source1Alleles.isEmpty() ) {
+            writeAll(source2Alleles, source2, null);
+        } else if ( source2Alleles.isEmpty() ) {
+            writeAll(source1Alleles, source1, null);
+        } else {
+            writeDifferences(source1Alleles, source2Alleles);
+        }
+    }
+
+    private byte[] generateHaplotype(final List<VariantContext> sourceVCs, final ReferenceContext refContext) {
+
+        final StringBuilder sb = new StringBuilder();
+
+        final int startPos = refContext.getWindow().getStart();
+        int currentPos = startPos;
+        final byte[] reference = refContext.getBases();
+
+        for ( final VariantContext vc : sourceVCs ) {
+            // add any missing reference context
+            int vcStart = vc.getStart();
+            final int refAlleleLength = vc.getReference().length();
+            if ( refAlleleLength == vc.getEnd() - vc.getStart() ) // this is a deletion (whereas for other events the padding base isn't part of the position)
+                vcStart++;
+
+            while ( currentPos < vcStart )
+                sb.append((char)reference[currentPos++ - startPos]);
+
+            // add the alt allele
+            sb.append(vc.getAlternateAllele(0).getBaseString());
+
+            // skip the reference allele
+            currentPos += refAlleleLength;
+        }
+        // add any missing reference context
+        final int stopPos = refContext.getWindow().getStop();
+        while ( currentPos < stopPos )
+            sb.append((char)reference[currentPos++ - startPos]);
+
+        return sb.toString().getBytes();
+    }
+
+    private void writeDifferences(final List<VariantContext> source1Alleles, final List<VariantContext> source2Alleles) {
+        int currentIndex1 = 0, currentIndex2 = 0;
+        final int size1 = source1Alleles.size(), size2 = source2Alleles.size();
+        VariantContext current1 = source1Alleles.get(0);
+        VariantContext current2 = source2Alleles.get(0);
+
+        while ( currentIndex1 < size1 || currentIndex2 < size2 ) {
+            if ( current1 == null ) {
+                writeOne(current2, source2, null);
+                currentIndex2++;
+                current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+            } else if ( current2 == null ) {
+                writeOne(current1, source1, null);
+                currentIndex1++;
+                current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+            } else {
+
+                final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
+                final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);
+
+                if ( loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2) ) {
+                    String status;
+                    if ( loc1.getStart() == loc2.getStart() ) {
+                        final String allele1 = current1.getAlternateAllele(0).getBaseString();
+                        final String allele2 = current2.getAlternateAllele(0).getBaseString();
+                        if ( allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1 )
+                            status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS;
+                        else
+                            status = SAME_START_DIFFERENT_ALLELES_STATUS;
+                    } else {
+                        status = OVERLAPPING_EVENTS_STATUS;
+                    }
+
+                    writeOne(current1, INTERSECTION_SET, status);
+                    currentIndex1++;
+                    currentIndex2++;
+                    current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+                    current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+                } else if ( loc1.isBefore(loc2) ) {
+                    writeOne(current1, source1, null);
+                    currentIndex1++;
+                    current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1): null);
+                } else {
+                    writeOne(current2, source2, null);
+                    currentIndex2++;
+                    current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2): null);
+                }
+            }
+        }
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java
@ -0,0 +1,149 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 23, 2011
+ */
+// Class for finding the K best paths (as determined by the sum of multiplicities of the edges) in a graph.
+// This is different from most graph traversals because we want to test paths from any source node to any sink node.
+public class KBestPaths {
+
+    // static access only
+    protected KBestPaths() { }
+    private static int MAX_PATHS_TO_HOLD = 100;
+
+    protected static class MyInt { public int val = 0; }
+
+    // class to keep track of paths
+    protected static class Path {
+
+        // the last vertex seen in the path
+        private DeBruijnVertex lastVertex;
+
+        // the list of edges comprising the path
+        private ArrayList<DeBruijnEdge> edges;
+
+        // the scores for the path
+        private int totalScore = 0, lowestEdge = -1;
+
+        public Path( final DeBruijnVertex initialVertex ) {
+            lastVertex = initialVertex;
+            edges = new ArrayList<DeBruijnEdge>(0);
+        }
+
+        public Path( final Path p, final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+            lastVertex = graph.getEdgeTarget(edge);
+            edges = new ArrayList<DeBruijnEdge>(p.edges);
+            edges.add(edge);
+            totalScore = p.totalScore + edge.getMultiplicity();
+            lowestEdge = ( p.lowestEdge == -1 ) ? edge.getMultiplicity() : Math.min(p.lowestEdge, edge.getMultiplicity());
+        }
+
+        public boolean containsEdge( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final DeBruijnEdge edge ) {
+            final DeBruijnVertex targetVertex = graph.getEdgeTarget(edge);
+            for( final DeBruijnEdge e : edges ) {
+                if( e.equals(graph, edge) || graph.getEdgeTarget(e).equals(targetVertex) ) {
+                    return true;
+                }
+            }
+            
+            return false;
+        }
+
+        public ArrayList<DeBruijnEdge> getEdges() { return edges; }
+
+        public int getScore() { return totalScore; }
+
+        public int getLowestEdge() { return lowestEdge; }
+
+        public DeBruijnVertex getLastVertexInPath() { return lastVertex; }
+
+        public byte[] getBases( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+            if( edges.size() == 0 ) { return lastVertex.getSequence(); }
+            
+            byte[] bases = graph.getEdgeSource( edges.get(0) ).getSequence();
+            for( final DeBruijnEdge e : edges ) {
+                bases = ArrayUtils.addAll(bases, graph.getEdgeTarget( e ).getSuffix());
+            }
+            return bases;
+        }
+    }
+
+    protected static class PathComparatorTotalScore implements Comparator<Path> {
+        public int compare(final Path path1, final Path path2) {
+            return path1.totalScore - path2.totalScore;
+        }
+    }
+
+    protected static class PathComparatorLowestEdge implements Comparator<Path> {
+        public int compare(final Path path1, final Path path2) {
+            return path2.lowestEdge - path1.lowestEdge;
+        }
+    }
+
+    public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
+        if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
+        final ArrayList<Path> bestPaths = new ArrayList<Path>();
+        
+        // run a DFS for best paths
+        for( final DeBruijnVertex v : graph.vertexSet() ) {
+            if( graph.inDegreeOf(v) == 0 ) {
+                findBestPaths(graph, new Path(v), bestPaths);
+            }
+        }
+
+        Collections.sort(bestPaths, new PathComparatorLowestEdge() );
+        Collections.reverse(bestPaths);
+        return bestPaths.subList(0, Math.min(k, bestPaths.size()));
+    }
+
+    private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths ) {
+        findBestPaths(graph, path, bestPaths, new MyInt());
+    }
+
+    private static void findBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path, final List<Path> bestPaths, MyInt n ) {
+
+        // did we hit the end of a path?
+        if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
+            if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
+                // clean out some low scoring paths
+                Collections.sort(bestPaths, new PathComparatorLowestEdge() );
+                for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
+            }
+            bestPaths.add(path);
+        } else if( n.val > 10000) {
+            // do nothing, just return
+        } else {
+            // recursively run DFS
+            final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
+            edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
+            Collections.sort(edgeArrayList);
+            Collections.reverse(edgeArrayList);
+            for ( final DeBruijnEdge edge : edgeArrayList ) {
+                // make sure the edge is not already in the path
+                if ( path.containsEdge(graph, edge) )
+                    continue;
+
+                final Path newPath = new Path(path, graph, edge);
+                n.val++;
+                findBestPaths(graph, newPath, bestPaths, n);
+            }
+        }
+    }
+
+    private static boolean allOutgoingEdgesHaveBeenVisited( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final Path path ) {
+        for( final DeBruijnEdge edge : graph.outgoingEdgesOf(path.lastVertex) ) {
+            if( !path.containsEdge(graph, edge) ) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.*;
+
+public class LikelihoodCalculationEngine {
+
+    private static final double LOG_ONE_HALF = -Math.log10(2.0);
+    private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1;
+    private final byte constantGCP;
+    private final boolean DEBUG;
+    private final PairHMM pairHMM;
+
+    public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final boolean noBanded ) {
+        pairHMM = new PairHMM( noBanded );
+        this.constantGCP = constantGCP;
+        DEBUG = debug;
+    }
+
+    public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
+        final int numHaplotypes = haplotypes.size();
+
+        int X_METRIC_LENGTH = 0;
+        for( final String sample : perSampleReadList.keySet() ) {
+            for( final GATKSAMRecord read : perSampleReadList.get(sample) ) {
+                final int readLength = read.getReadLength();
+                if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
+            }
+        }
+        int Y_METRIC_LENGTH = 0;
+        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
+            final int haplotypeLength = haplotypes.get(jjj).getBases().length;
+            if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; }
+        }
+
+        // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions and + 1 to consider the final base in a non-global alignment
+        X_METRIC_LENGTH += 2;
+        Y_METRIC_LENGTH += 2;
+
+        // initial arrays to hold the probabilities of being in the match, insertion and deletion cases
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+
+        PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
+
+        // for each sample's reads
+        for( final String sample : perSampleReadList.keySet() ) {
+            //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
+            // evaluate the likelihood of the reads given those haplotypes
+            computeReadLikelihoods( haplotypes, perSampleReadList.get(sample), sample, matchMetricArray, XMetricArray, YMetricArray );
+        }
+    }
+
+    private void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample,
+                                         final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray ) {
+
+        final int numHaplotypes = haplotypes.size();
+        final int numReads = reads.size();
+        final double[][] readLikelihoods = new double[numHaplotypes][numReads];
+        for( int iii = 0; iii < numReads; iii++ ) {
+            final GATKSAMRecord read = reads.get(iii);
+
+            final byte[] overallGCP = new byte[read.getReadLength()];
+            Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
+            Haplotype previousHaplotypeSeen = null;
+            final byte[] readQuals = read.getBaseQualities();
+            final byte[] readInsQuals = read.getBaseInsertionQualities();
+            final byte[] readDelQuals = read.getBaseDeletionQualities();
+            for( int kkk = 0; kkk < readQuals.length; kkk++ ) {
+                readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
+                //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
+                //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
+                readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
+            }
+
+            for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
+                final Haplotype haplotype = haplotypes.get(jjj);
+                final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
+                previousHaplotypeSeen = haplotype;
+
+                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(),
+                        readQuals, readInsQuals, readDelQuals, overallGCP,
+                        haplotypeStart, matchMetricArray, XMetricArray, YMetricArray);
+            }
+        }
+        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
+            haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] );
+        }
+    }
+
+    private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
+        for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){
+            if( b1[iii] != b2[iii] ) {
+                return iii;
+            }
+        }
+        return b1.length;
+    }
+
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final String sample ) {
+        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
+        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
+        for( final Haplotype h : haplotypes ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            list.add(h);
+            haplotypeMapping.add(list);
+        }
+        return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
+    }
+    
+    @Requires({"haplotypeMapping.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+
+        final int numHaplotypes = haplotypeMapping.size();
+        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // compute the diploid haplotype likelihoods
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ) {                
+                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
+                    final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
+                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
+                        final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
+                        double haplotypeLikelihood = 0.0;
+                        for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                            // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                            // First term is approximated by Jacobian log with table lookup.
+                            haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
+                        }
+                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
+                    }
+                }       
+            }
+        }
+
+        // normalize the diploid likelihoods matrix
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );        
+    }
+
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final Set<String> samples ) {
+        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
+        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
+        for( final Haplotype h : haplotypes ) {
+            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
+            list.add(h);
+            haplotypeMapping.add(list);
+        }
+
+        final int numHaplotypes = haplotypeMapping.size();
+        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // compute the diploid haplotype likelihoods
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ) {
+                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
+                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
+                        double haplotypeLikelihood = 0.0;
+                        for( final String sample : samples ) {
+                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
+                            final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
+                            for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                                // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                                // First term is approximated by Jacobian log with table lookup.
+                                haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF;
+                            }
+                        }
+                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
+                    }
+                }
+            }
+        }
+
+        // normalize the diploid likelihoods matrix
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
+    }
+
+    @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
+    @Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"})
+    protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) {
+        final int numHaplotypes = likelihoodMatrix.length;
+        double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
+        int index = 0;
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ){
+                genotypeLikelihoods[index++] = likelihoodMatrix[iii][jjj];
+            }
+        }
+        genotypeLikelihoods = MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
+        index = 0;
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ){
+                likelihoodMatrix[iii][jjj] = genotypeLikelihoods[index++];
+            }
+        }
+        return likelihoodMatrix;
+    }
+
+    /*
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.size() <= haplotypes.size()"})
+    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
+
+        // BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping
+
+        final int numHaplotypes = haplotypes.size();
+        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
+        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
+        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
+        final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes];
+
+        int sampleCount = 0;
+        for( final String sample : sampleKeySet ) {
+            haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample );
+        }
+
+        int hap1 = 0;
+        int hap2 = 0;
+        int chosenSample = 0;
+        //double bestElement = Double.NEGATIVE_INFINITY;
+        final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 );
+        while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
+            double maxElement = Double.NEGATIVE_INFINITY;
+            for( int kkk = 0; kkk < sampleCount; kkk++ ) {
+                for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                    for( int jjj = 0; jjj <= iii; jjj++ ) {
+                        if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) {
+                            maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj];
+                            hap1 = iii;
+                            hap2 = jjj;
+                            chosenSample = kkk;
+                        }
+                    }
+                }
+            }
+            if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
+
+            if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
+            if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
+
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY;
+                }
+            }
+        }
+
+        if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
+
+        final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
+        for( final int hIndex : bestHaplotypesIndexList ) {
+            bestHaplotypes.add( haplotypes.get(hIndex) );
+        }
+        return bestHaplotypes;
+    }
+    */
+
+    @Requires({"haplotypes.size() > 0"})
+    @Ensures({"result.size() <= haplotypes.size()"})
+    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
+
+        final int numHaplotypes = haplotypes.size();
+        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
+        final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
+        bestHaplotypesIndexList.add(0); // always start with the reference haplotype
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together
+
+        int hap1 = 0;
+        int hap2 = 0;
+        //double bestElement = Double.NEGATIVE_INFINITY;
+        final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
+        while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
+            double maxElement = Double.NEGATIVE_INFINITY;
+            for( int iii = 0; iii < numHaplotypes; iii++ ) {
+                for( int jjj = 0; jjj <= iii; jjj++ ) {
+                    if( haplotypeLikelihoodMatrix[iii][jjj] > maxElement ) {
+                        maxElement = haplotypeLikelihoodMatrix[iii][jjj];
+                        hap1 = iii;
+                        hap2 = jjj;
+                    }
+                }
+            }
+            if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
+            if( DEBUG ) { System.out.println("Chose haplotypes " + hap1 + " and " + hap2 + " with diploid likelihood = " + haplotypeLikelihoodMatrix[hap1][hap2]); }
+            haplotypeLikelihoodMatrix[hap1][hap2] = Double.NEGATIVE_INFINITY;
+
+            if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
+            if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
+        }
+
+        if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
+
+        final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
+        for( final int hIndex : bestHaplotypesIndexList ) {
+            bestHaplotypes.add( haplotypes.get(hIndex) );
+        }
+        return bestHaplotypes;
+    }
+
+    public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
+        final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
+        final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
+        for( final String sample : perSampleReadList.keySet() ) {
+            final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
+            final ArrayList<GATKSAMRecord> readsForThisSample = perSampleReadList.get(sample);
+            for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
+                final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
+                    final double likelihoods[] = new double[call.getFirst().getAlleles().size()];
+                    int count = 0;
+                    for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
+                        double maxLikelihood = Double.NEGATIVE_INFINITY;
+                        for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
+                            final double likelihood = h.getReadLikelihoods(sample)[iii];
+                            if( likelihood > maxLikelihood ) {
+                                maxLikelihood = likelihood;
+                            }
+                        }
+                        likelihoods[count++] = maxLikelihood;
+                    }
+                    final int bestAllele = MathUtils.maxElementIndex(likelihoods);
+                    final double bestLikelihood = likelihoods[bestAllele];
+                    Allele allele = Allele.NO_CALL;
+                    boolean isInformativeRead = false;
+                    for( final double likelihood : likelihoods ) {
+                        if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) {
+                            isInformativeRead = true;
+                            break;
+                        }
+                    }
+                    // uninformative reads get the no call Allele
+                    if( isInformativeRead ) {
+                        allele = call.getFirst().getAlleles().get(bestAllele);
+                    }
+                    List<GATKSAMRecord> readList = alleleReadMap.get(allele);
+                    if( readList == null ) {
+                        readList = new ArrayList<GATKSAMRecord>();
+                        alleleReadMap.put(allele, readList);
+                    }
+                    readList.add(read);
+                }
+            }
+            // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
+            List<GATKSAMRecord> readList = alleleReadMap.get(Allele.NO_CALL);
+            if( readList == null ) {
+                readList = new ArrayList<GATKSAMRecord>();
+                alleleReadMap.put(Allele.NO_CALL, readList);
+            }
+            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) {
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
+                    readList.add(read);
+                }
+            }
+            returnMap.put(sample, alleleReadMap);
+        }
+        return returnMap;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@ -0,0 +1,25 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.ArrayList;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Mar 14, 2011
+ */
+public abstract class LocalAssemblyEngine {
+
+    public enum ASSEMBLER {
+        SIMPLE_DE_BRUIJN
+    }
+
+    protected LocalAssemblyEngine() {
+    }
+
+    public abstract ArrayList<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList<VariantContext> activeAllelesToGenotype);
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@ -0,0 +1,372 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import com.google.java.contract.Ensures;
+import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.SWPairwiseAlignment;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.AlignmentUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.jgrapht.graph.DefaultDirectedGraph;
+
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks, rpoplin
+ * Date: Mar 14, 2011
+ */
+
+public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
+
+    private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
+    private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
+    private static final byte MIN_QUALITY = (byte) 17;
+
+    // Smith-Waterman parameters originally copied from IndelRealigner
+    private static final double SW_MATCH = 5.0;      // 1.0;
+    private static final double SW_MISMATCH = -10.0;  //-1.0/3.0;
+    private static final double SW_GAP = -22.0;       //-1.0-1.0/3.0;
+    private static final double SW_GAP_EXTEND = -1.2; //-1.0/.0;
+
+    private final boolean DEBUG;
+    private final PrintStream GRAPH_WRITER;
+    private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
+
+    private int PRUNE_FACTOR = 1;
+    
+    public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) {
+        super();
+        DEBUG = debug;
+        GRAPH_WRITER = graphWriter;
+    }
+
+    public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
+        this.PRUNE_FACTOR = PRUNE_FACTOR;
+
+        // create the graphs
+        createDeBruijnGraphs( activeRegion.getReads(), refHaplotype );
+
+        // clean up the graphs by pruning and merging
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            pruneGraph( graph, PRUNE_FACTOR );
+            //eliminateNonRefPaths( graph );
+            mergeNodes( graph );
+        }
+
+        if( GRAPH_WRITER != null ) {
+            printGraphs();
+        }
+
+        // find the best paths in the graphs
+        return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
+    }
+
+    private void createDeBruijnGraphs( final ArrayList<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
+        graphs.clear();
+
+        // create the graph
+        for( int kmer = 31; kmer <= 75; kmer += 6 ) {
+            final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
+                graphs.add(graph);
+            }
+        }
+    }
+
+    protected static void mergeNodes( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+        boolean foundNodesToMerge = true;
+        while( foundNodesToMerge ) {
+            foundNodesToMerge = false;
+            for( final DeBruijnEdge e : graph.edgeSet() ) {
+                final DeBruijnVertex outgoingVertex = graph.getEdgeTarget(e);
+                final DeBruijnVertex incomingVertex = graph.getEdgeSource(e);
+                if( !outgoingVertex.equals(incomingVertex) && graph.inDegreeOf(outgoingVertex) == 1 && graph.outDegreeOf(incomingVertex) == 1) {
+                    final Set<DeBruijnEdge> outEdges = graph.outgoingEdgesOf(outgoingVertex);
+                    final Set<DeBruijnEdge> inEdges = graph.incomingEdgesOf(incomingVertex);
+                    if( inEdges.size() == 1 && outEdges.size() == 1 ) {
+                        inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
+                        outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
+                    } else if( inEdges.size() == 1 ) {
+                        inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
+                    } else if( outEdges.size() == 1 ) {
+                        outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
+                    }
+
+                    final DeBruijnVertex addedVertex = new DeBruijnVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSuffix()), outgoingVertex.kmer );
+                    graph.addVertex(addedVertex);
+                    for( final DeBruijnEdge edge : outEdges ) {
+                        graph.addEdge(addedVertex, graph.getEdgeTarget(edge), new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+                    }
+                    for( final DeBruijnEdge edge : inEdges ) {
+                        graph.addEdge(graph.getEdgeSource(edge), addedVertex, new DeBruijnEdge(edge.getIsRef(), edge.getMultiplicity()));
+                    }
+
+                    graph.removeVertex( incomingVertex );
+                    graph.removeVertex( outgoingVertex );
+                    foundNodesToMerge = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
+        final ArrayList<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
+        for( final DeBruijnEdge e : graph.edgeSet() ) {
+            if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
+                edgesToRemove.add(e);
+            }
+        }
+        graph.removeAllEdges(edgesToRemove);
+
+        // Run through the graph and clean up singular orphaned nodes
+        final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
+        for( final DeBruijnVertex v : graph.vertexSet() ) {
+            if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) {
+                verticesToRemove.add(v);
+            }
+        }
+        graph.removeAllVertices(verticesToRemove);
+    }
+
+    protected static void eliminateNonRefPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
+        final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
+        boolean done = false;
+        while( !done ) {
+            done = true;
+            for( final DeBruijnVertex v : graph.vertexSet() ) {
+                if( graph.inDegreeOf(v) == 0 || graph.outDegreeOf(v) == 0 ) {
+                    boolean isRefNode = false;
+                    for( final DeBruijnEdge e : graph.edgesOf(v) ) {
+                        if( e.getIsRef() ) {
+                            isRefNode = true;
+                            break;
+                        }
+                    }
+                    if( !isRefNode ) {
+                        done = false;
+                        verticesToRemove.add(v);
+                    }
+                }
+            }
+            graph.removeAllVertices(verticesToRemove);
+            verticesToRemove.clear();
+        }
+    }
+
+    private static boolean createGraphFromSequences( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final ArrayList<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
+        final byte[] refSequence = refHaplotype.getBases();
+        if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
+            final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
+            for (int i = 0; i < kmersInSequence - 1; i++) {
+                // get the kmers
+                final byte[] kmer1 = new byte[KMER_LENGTH];
+                System.arraycopy(refSequence, i, kmer1, 0, KMER_LENGTH);
+                final byte[] kmer2 = new byte[KMER_LENGTH];
+                System.arraycopy(refSequence, i+1, kmer2, 0, KMER_LENGTH);
+                if( !addKmersToGraph(graph, kmer1, kmer2, true) ) {
+                    if( DEBUG ) {
+                        System.out.println("Cycle detected in reference graph for kmer = " + KMER_LENGTH + " ...skipping");
+                    }
+                    return false;
+                }
+            }
+        }
+
+        for( final GATKSAMRecord read : reads ) {
+            final byte[] sequence = read.getReadBases();
+            final byte[] qualities = read.getBaseQualities();
+            if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) {
+                final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
+                for( int iii = 0; iii < kmersInSequence - 1; iii++ ) {                    
+                    // if the qualities of all the bases in the kmers are high enough
+                    boolean badKmer = false;
+                    for( int jjj = iii; jjj < iii + KMER_LENGTH + 1; jjj++) {
+                        if( qualities[jjj] < MIN_QUALITY ) {
+                            badKmer = true;
+                            break;
+                        }
+                    }
+                    if( !badKmer ) {
+                        // get the kmers
+                        final byte[] kmer1 = new byte[KMER_LENGTH];
+                        System.arraycopy(sequence, iii, kmer1, 0, KMER_LENGTH);
+                        final byte[] kmer2 = new byte[KMER_LENGTH];
+                        System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH);
+
+                        addKmersToGraph(graph, kmer1, kmer2, false);
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    protected static boolean addKmersToGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final byte[] kmer1, final byte[] kmer2, final boolean isRef ) {
+
+        final int numVertexBefore = graph.vertexSet().size();
+        final DeBruijnVertex v1 = new DeBruijnVertex( kmer1, kmer1.length );
+        graph.addVertex(v1);
+        final DeBruijnVertex v2 = new DeBruijnVertex( kmer2, kmer2.length );
+        graph.addVertex(v2);
+        if( isRef && graph.vertexSet().size() == numVertexBefore ) { return false; }
+
+        final DeBruijnEdge targetEdge = graph.getEdge(v1, v2);
+        if ( targetEdge == null ) {
+            graph.addEdge(v1, v2, new DeBruijnEdge( isRef ));
+        } else {
+            if( isRef ) {
+                targetEdge.setIsRef( true );
+            }
+            targetEdge.setMultiplicity(targetEdge.getMultiplicity() + 1);
+        }
+        return true;
+    }
+
+    private void printGraphs() {
+        int count = 0;
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            GRAPH_WRITER.println("digraph kmer" + count++ +" {");
+            for( final DeBruijnEdge edge : graph.edgeSet() ) {
+                if( edge.getMultiplicity() > PRUNE_FACTOR ) {
+                    GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
+                }
+                if( edge.getIsRef() ) {
+                    GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
+                }
+                if( !edge.getIsRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
+            }
+            for( final DeBruijnVertex v : graph.vertexSet() ) {
+                final String label = ( graph.inDegreeOf(v) == 0 ? v.toString() : v.getSuffixString() );
+                GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + label + "\"]");
+            }
+            GRAPH_WRITER.println("}");
+        }
+    }
+
+    @Ensures({"result.contains(refHaplotype)"})
+    private ArrayList<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
+        final ArrayList<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
+
+        // add the reference haplotype separately from all the others
+        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        refHaplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
+        refHaplotype.setCigar( swConsensus.getCigar() );
+        if( !returnHaplotypes.add( refHaplotype ) ) {
+            throw new ReviewedStingException("Unable to add reference haplotype during assembly: " + refHaplotype);
+        }
+
+        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
+        final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();
+
+        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype
+            for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart());
+                if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
+                    return returnHaplotypes;
+                    //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
+                }
+            }
+        }
+
+        for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
+            for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
+                final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
+                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
+                    if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                        final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
+                        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                            final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
+                            if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
+                                for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
+                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if( DEBUG ) { 
+            if( returnHaplotypes.size() > 1 ) {
+                System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes to evaluate every read against.");
+            } else {
+                System.out.println("Found only the reference haplotype in the assembly graph.");
+            }
+            for( final Haplotype h : returnHaplotypes ) {
+                System.out.println( h.toString() );
+                System.out.println( "> Cigar = " + h.getCigar() );
+            }
+        }
+
+        return returnHaplotypes;
+    }
+
+    private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
+        //final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
+        final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+        haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
+        haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
+
+        if( swConsensus.getCigar().toString().contains("S") || swConsensus.getCigar().getReferenceLength() < 60 ) { // protect against SW failures
+            return false;
+        }
+
+        final int hapStart = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStart, ReadUtils.ClippingTail.LEFT_TAIL, true );
+        int hapStop = ReadUtils.getReadCoordinateForReferenceCoordinate( haplotype.getAlignmentStartHapwrtRef(), haplotype.getCigar(), activeRegionStop, ReadUtils.ClippingTail.RIGHT_TAIL, true );
+        if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED && activeRegionStop == haplotype.getAlignmentStartHapwrtRef() + haplotype.getCigar().getReferenceLength() ) {
+            hapStop = activeRegionStop; // contract for getReadCoordinateForReferenceCoordinate function says that if read ends at boundary then it is outside of the clipping goal
+        }
+        byte[] newHaplotypeBases;
+        // extend partial haplotypes to contain the full active region sequence
+        int leftBreakPoint = 0;
+        int rightBreakPoint = 0;
+        if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED && hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()),
+                                                   haplotype.getBases()),
+                                                   ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
+            leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
+            rightBreakPoint = leftBreakPoint + haplotype.getBases().length;
+            //newHaplotypeBases = haplotype.getBases();
+            //return false; // piece of haplotype isn't anchored within the active region so don't build a haplotype out of it
+        } else if( hapStart == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            //return false;
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(ref, activeRegionStart, swConsensus.getAlignmentStart2wrt1()), ArrayUtils.subarray(haplotype.getBases(), 0, hapStop) );
+            //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), 0, hapStop);
+            leftBreakPoint = swConsensus.getAlignmentStart2wrt1() - activeRegionStart;
+        } else if( hapStop == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) {
+            //return false;
+            newHaplotypeBases = ArrayUtils.addAll( ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length), ArrayUtils.subarray(ref, swConsensus.getAlignmentStart2wrt1() + swConsensus.getCigar().getReferenceLength(), activeRegionStop) );
+            //newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, haplotype.getBases().length);
+            rightBreakPoint = haplotype.getBases().length - hapStart;
+        } else {
+            newHaplotypeBases = ArrayUtils.subarray(haplotype.getBases(), hapStart, hapStop);
+        }
+
+        final Haplotype h = new Haplotype( newHaplotypeBases );
+        final SWPairwiseAlignment swConsensus2 = new SWPairwiseAlignment( ref, h.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
+
+        h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
+        h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
+        h.leftBreakPoint = leftBreakPoint;
+        h.rightBreakPoint = rightBreakPoint;
+        if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
+            return false;
+        }
+
+        if( !haplotypeList.contains(h) ) {
+            haplotypeList.add(h);
+            return true;
+        } else {
+            return false;
+        }
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
@ -0,0 +1,271 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/15/12
+ */
+
+import net.sf.picard.reference.ReferenceSequenceFile;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+/**
+ * Unit tests for GenotypingEngine
+ */
+public class GenotypingEngineUnitTest extends BaseTest {
+
+    private static ReferenceSequenceFile seq;
+    private GenomeLocParser genomeLocParser;
+
+    @BeforeClass
+    public void init() throws FileNotFoundException {
+        // sequence
+        seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
+        genomeLocParser = new GenomeLocParser(seq);
+    }
+
+    @Test
+    public void testFindHomVarEventAllelesInSample() {
+        final List<Allele> eventAlleles = new ArrayList<Allele>();
+        eventAlleles.add( Allele.create("A", true) );
+        eventAlleles.add( Allele.create("C", false) );
+        final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
+        haplotypeAlleles.add( Allele.create("AATA", true) );
+        haplotypeAlleles.add( Allele.create("AACA", false) );
+        haplotypeAlleles.add( Allele.create("CATA", false) );
+        haplotypeAlleles.add( Allele.create("CACA", false) );
+        final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+        haplotypes.add(new Haplotype("AATA".getBytes()));
+        haplotypes.add(new Haplotype("AACA".getBytes()));
+        haplotypes.add(new Haplotype("CATA".getBytes()));
+        haplotypes.add(new Haplotype("CACA".getBytes()));
+        final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
+        haplotypeAllelesForSample.add( Allele.create("CATA", false) );
+        haplotypeAllelesForSample.add( Allele.create("CACA", false) );
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
+        Aallele.add(haplotypes.get(0));
+        Aallele.add(haplotypes.get(1));
+        ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
+        Callele.add(haplotypes.get(2));
+        Callele.add(haplotypes.get(3));
+        alleleMapper.add(Aallele);
+        alleleMapper.add(Callele);
+        final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        eventAllelesForSample.add( Allele.create("C", false) );
+        eventAllelesForSample.add( Allele.create("C", false) );
+
+        if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
+            logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
+            logger.warn("expected alleles = " + eventAllelesForSample);
+        }
+        Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
+    }
+
+    @Test
+    public void testFindHetEventAllelesInSample() {
+        final List<Allele> eventAlleles = new ArrayList<Allele>();
+        eventAlleles.add( Allele.create("A", true) );
+        eventAlleles.add( Allele.create("C", false) );
+        eventAlleles.add( Allele.create("T", false) );
+        final List<Allele> haplotypeAlleles = new ArrayList<Allele>();
+        haplotypeAlleles.add( Allele.create("AATA", true) );
+        haplotypeAlleles.add( Allele.create("AACA", false) );
+        haplotypeAlleles.add( Allele.create("CATA", false) );
+        haplotypeAlleles.add( Allele.create("CACA", false) );
+        haplotypeAlleles.add( Allele.create("TACA", false) );
+        haplotypeAlleles.add( Allele.create("TTCA", false) );
+        haplotypeAlleles.add( Allele.create("TTTA", false) );
+        final ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+        haplotypes.add(new Haplotype("AATA".getBytes()));
+        haplotypes.add(new Haplotype("AACA".getBytes()));
+        haplotypes.add(new Haplotype("CATA".getBytes()));
+        haplotypes.add(new Haplotype("CACA".getBytes()));
+        haplotypes.add(new Haplotype("TACA".getBytes()));
+        haplotypes.add(new Haplotype("TTCA".getBytes()));
+        haplotypes.add(new Haplotype("TTTA".getBytes()));
+        final List<Allele> haplotypeAllelesForSample = new ArrayList<Allele>();
+        haplotypeAllelesForSample.add( Allele.create("TTTA", false) );
+        haplotypeAllelesForSample.add( Allele.create("AATA", true) );
+        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
+        ArrayList<Haplotype> Aallele = new ArrayList<Haplotype>();
+        Aallele.add(haplotypes.get(0));
+        Aallele.add(haplotypes.get(1));
+        ArrayList<Haplotype> Callele = new ArrayList<Haplotype>();
+        Callele.add(haplotypes.get(2));
+        Callele.add(haplotypes.get(3));
+        ArrayList<Haplotype> Tallele = new ArrayList<Haplotype>();
+        Tallele.add(haplotypes.get(4));
+        Tallele.add(haplotypes.get(5));
+        Tallele.add(haplotypes.get(6));
+        alleleMapper.add(Aallele);
+        alleleMapper.add(Callele);
+        alleleMapper.add(Tallele);
+        final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
+        eventAllelesForSample.add( Allele.create("A", true) );
+        eventAllelesForSample.add( Allele.create("T", false) );
+
+        if(!compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes))) {
+            logger.warn("calc alleles = " + GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes));
+            logger.warn("expected alleles = " + eventAllelesForSample);
+        }
+        Assert.assertTrue(compareAlleleLists(eventAllelesForSample, GenotypingEngine.findEventAllelesInSample(eventAlleles, haplotypeAlleles, haplotypeAllelesForSample, alleleMapper, haplotypes)));
+    }
+
+    private boolean compareAlleleLists(List<Allele> l1, List<Allele> l2) {
+        if( l1.size() != l2.size() ) {
+            return false; // sanity check
+        }
+
+        for( int i=0; i < l1.size(); i++ ){
+            if ( !l2.contains(l1.get(i)) )
+                return false;
+        }
+        return true;
+    }
+
+    
+    private class BasicGenotypingTestProvider extends TestDataProvider {
+        byte[] ref;
+        byte[] hap;
+        HashMap<Integer,Byte> expected;
+        GenotypingEngine ge = new GenotypingEngine(false, 0, false);
+
+        public BasicGenotypingTestProvider(String refString, String hapString, HashMap<Integer, Byte> expected) {
+            super(BasicGenotypingTestProvider.class, String.format("Haplotype to VCF test: ref = %s, alignment = %s", refString,hapString));
+            ref = refString.getBytes();
+            hap = hapString.getBytes();
+            this.expected = expected;
+        }
+        
+        public HashMap<Integer,VariantContext> calcAlignment() {
+            final SWPairwiseAlignment alignment = new SWPairwiseAlignment(ref, hap);
+            return ge.generateVCsFromAlignment( alignment.getAlignmentStart2wrt1(), alignment.getCigar(), ref, hap, genomeLocParser.createGenomeLoc("4",1,1+ref.length), "name", 0);
+        }
+    }
+
+    @DataProvider(name = "BasicGenotypingTestProvider")
+    public Object[][] makeBasicGenotypingTests() {
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(2 + contextSize, (byte)'M');
+            map.put(21 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG", "ATCTCGCATCGCGAGCATCGCCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'I');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGACACTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1, (byte)'M');
+            map.put(20, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider("AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCGATAG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(2 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'I');
+            map.put(30 + contextSize, (byte)'D');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "ACCTCGCATCGCGAGCATCGTTACTAGCCGATG", map);
+        }
+
+        for( int contextSize : new int[]{0,1,5,9,24,36} ) {
+            HashMap<Integer, Byte> map = new HashMap<Integer, Byte>();
+            map.put(1 + contextSize, (byte)'M');
+            map.put(20 + contextSize, (byte)'D');
+            map.put(28 + contextSize, (byte)'M');
+            final String context = Utils.dupString('G', contextSize);
+            new BasicGenotypingTestProvider(context + "AGCTCGCATCGCGAGCATCGACTAGCCGATAG" + context, "CGCTCGCATCGCGAGCATCGCTAGCCCATAG", map);
+        }
+
+        return BasicGenotypingTestProvider.getTests(BasicGenotypingTestProvider.class);
+    }
+    
+    @Test(dataProvider = "BasicGenotypingTestProvider", enabled = true)
+    public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) {
+        HashMap<Integer,VariantContext> calculatedMap = cfg.calcAlignment();
+        HashMap<Integer,Byte> expectedMap = cfg.expected;
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        if(!compareVCMaps(calculatedMap, expectedMap)) {
+            logger.warn("calc map = " + calculatedMap);
+            logger.warn("expected map = " + expectedMap);
+        }
+        Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap));
+    }
+
+    /**
+     * Tests that we get the right values from the binomial distribution
+     */
+    @Test
+    public void testCalculateR2LD() {
+        logger.warn("Executing testCalculateR2LD");
+
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,1,1,1), 0.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(100,100,100,100), 0.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,0,0,1), 1.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(100,0,0,100), 1.0, 0.00001);
+        Assert.assertEquals(GenotypingEngine.calculateR2LD(1,2,3,4), (0.1 - 0.12) * (0.1 - 0.12) / (0.3 * 0.7 * 0.4 * 0.6), 0.00001);
+    }
+    
+    /**
+     * Private function to compare HashMap of VCs, it only checks the types and start locations of the VariantContext
+     */
+    private boolean compareVCMaps(HashMap<Integer, VariantContext> calc, HashMap<Integer, Byte> expected) {
+        if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check
+        for( Integer loc : expected.keySet() ) {
+            Byte type = expected.get(loc);
+            switch( type ) {
+                case 'I':
+                    if( !calc.get(loc).isSimpleInsertion() ) { return false; }
+                    break;
+                case 'D':
+                    if( !calc.get(loc).isSimpleDeletion() ) { return false; }
+                    break;
+                case 'M':
+                    if( !(calc.get(loc).isMNP() || calc.get(loc).isSNP()) ) { return false; }
+                    break;
+                default:
+                    return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -0,0 +1,36 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+public class HaplotypeCallerIntegrationTest extends WalkerTest {
+    final static String REF = b37KGReference;
+    final String NA12878_BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
+    final String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
+    final String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
+    //final String RECAL_FILE = validationDataLocation + "NA12878.kmer.8.subset.recal_data.bqsr";
+
+    private void HCTest(String bam, String args, String md5) {
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
+        final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
+        executeTest("testHaplotypeCaller: args=" + args, spec);
+    }
+
+    @Test
+    public void testHaplotypeCallerMultiSample() {
+        HCTest(CEUTRIO_BAM, "", "7b4e76934e0c911220b4e7da8776ab2b");
+    }
+
+    @Test
+    public void testHaplotypeCallerSingleSample() {
+        HCTest(NA12878_BAM, "", "fcf0cea98a571d5e2d1dfa8b5edc599d");
+    }
+
+    @Test
+    public void testHaplotypeCallerMultiSampleGGA() {
+        HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6");
+    }
+}
+
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@ -0,0 +1,173 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/14/12
+ */
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * Unit tests for LikelihoodCalculationEngine
+ */
+public class LikelihoodCalculationEngineUnitTest extends BaseTest {
+
+    @Test
+    public void testNormalizeDiploidLikelihoodMatrixFromLog10() {
+        double[][] likelihoodMatrix = {
+            {-90.2,     0,      0},
+            {-190.1, -2.1,      0},
+            {-7.0,  -17.5,  -35.9}
+        };
+        double[][] normalizedMatrix = {
+            {-88.1,     0,      0},
+            {-188.0,  0.0,      0},
+            {-4.9,  -15.4,  -33.8}
+        };
+
+
+        Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix), normalizedMatrix));
+
+        double[][] likelihoodMatrix2 = {
+                {-90.2,     0,      0,        0},
+                {-190.1, -2.1,      0,        0},
+                {-7.0,  -17.5,  -35.9,        0},
+                {-7.0,  -17.5,  -35.9,  -1000.0},
+        };
+        double[][] normalizedMatrix2 = {
+                {-88.1,     0,      0,        0},
+                {-188.0,  0.0,      0,        0},
+                {-4.9,  -15.4,  -33.8,        0},
+                {-4.9,  -15.4,  -33.8,   -997.9},
+        };
+        Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix2), normalizedMatrix2));
+    }
+
+    private class BasicLikelihoodTestProvider extends TestDataProvider {
+        public Double readLikelihoodForHaplotype1;
+        public Double readLikelihoodForHaplotype2;
+        public Double readLikelihoodForHaplotype3;
+        
+        public BasicLikelihoodTestProvider(double a, double b) {
+            super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f",a,b));
+            readLikelihoodForHaplotype1 = a;
+            readLikelihoodForHaplotype2 = b;
+            readLikelihoodForHaplotype3 = null;
+        }
+
+        public BasicLikelihoodTestProvider(double a, double b, double c) {
+            super(BasicLikelihoodTestProvider.class, String.format("Diploid haplotype likelihoods for reads %f / %f / %f",a,b,c));
+            readLikelihoodForHaplotype1 = a;
+            readLikelihoodForHaplotype2 = b;
+            readLikelihoodForHaplotype3 = c;
+        }
+        
+        public double[][] expectedDiploidHaplotypeMatrix() {
+            if( readLikelihoodForHaplotype3 == null ) {
+                double maxValue = Math.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2);
+                double[][] normalizedMatrix = {
+                        {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue}
+                };
+                return normalizedMatrix;
+            } else {
+                double maxValue = MathUtils.max(readLikelihoodForHaplotype1,readLikelihoodForHaplotype2,readLikelihoodForHaplotype3);
+                double[][] normalizedMatrix = {
+                        {readLikelihoodForHaplotype1 - maxValue, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype2)) - maxValue, readLikelihoodForHaplotype2 - maxValue, Double.NEGATIVE_INFINITY},
+                        {Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype1) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue,
+                         Math.log10(0.5*Math.pow(10,readLikelihoodForHaplotype2) + 0.5*Math.pow(10,readLikelihoodForHaplotype3)) - maxValue, readLikelihoodForHaplotype3 - maxValue}
+                };
+                return normalizedMatrix;
+            }
+        }
+        
+        public double[][] calcDiploidHaplotypeMatrix() {
+            ArrayList<Haplotype> haplotypes = new ArrayList<Haplotype>();
+            for( int iii = 1; iii <= 3; iii++) {
+                Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) );
+                if( readLikelihood != null ) {
+                    Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() );
+                    haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood});
+                    haplotypes.add(haplotype);
+                }
+            }
+            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, "myTestSample");
+        }
+    }
+
+    @DataProvider(name = "BasicLikelihoodTestProvider")
+    public Object[][] makeBasicLikelihoodTests() {
+        new BasicLikelihoodTestProvider(-1.1, -2.2);
+        new BasicLikelihoodTestProvider(-2.2, -1.1);
+        new BasicLikelihoodTestProvider(-1.1, -1.1);
+        new BasicLikelihoodTestProvider(-9.7, -15.0);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2);
+        new BasicLikelihoodTestProvider(0, 0);
+        new BasicLikelihoodTestProvider(-1.1, 0);
+        new BasicLikelihoodTestProvider(0, -2.2);
+        new BasicLikelihoodTestProvider(-100.1, -200.2);
+
+        new BasicLikelihoodTestProvider(-1.1, -2.2, 0);
+        new BasicLikelihoodTestProvider(-2.2, -1.1, 0);
+        new BasicLikelihoodTestProvider(-1.1, -1.1, 0);
+        new BasicLikelihoodTestProvider(-9.7, -15.0, 0);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2, 0);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2, 0);
+        new BasicLikelihoodTestProvider(0, 0, 0);
+        new BasicLikelihoodTestProvider(-1.1, 0, 0);
+        new BasicLikelihoodTestProvider(0, -2.2, 0);
+        new BasicLikelihoodTestProvider(-100.1, -200.2, 0);
+
+        new BasicLikelihoodTestProvider(-1.1, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(-2.2, -1.1, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, -1.1, -12.121);
+        new BasicLikelihoodTestProvider(-9.7, -15.0, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, -2000.2, -12.121);
+        new BasicLikelihoodTestProvider(-1000.1, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(0, 0, -12.121);
+        new BasicLikelihoodTestProvider(-1.1, 0, -12.121);
+        new BasicLikelihoodTestProvider(0, -2.2, -12.121);
+        new BasicLikelihoodTestProvider(-100.1, -200.2, -12.121);
+
+        return BasicLikelihoodTestProvider.getTests(BasicLikelihoodTestProvider.class);
+    }
+
+    @Test(dataProvider = "BasicLikelihoodTestProvider", enabled = true)
+    public void testOneReadWithTwoOrThreeHaplotypes(BasicLikelihoodTestProvider cfg) {
+        double[][] calculatedMatrix = cfg.calcDiploidHaplotypeMatrix();
+        double[][] expectedMatrix = cfg.expectedDiploidHaplotypeMatrix();
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        Assert.assertTrue(compareDoubleArrays(calculatedMatrix, expectedMatrix));
+    }
+
+    /**
+     * Private function to compare 2d arrays
+     */
+    private boolean compareDoubleArrays(double[][] b1, double[][] b2) {
+        if( b1.length != b2.length ) {
+            return false; // sanity check
+        }
+
+        for( int i=0; i < b1.length; i++ ){
+            if( b1[i].length != b2[i].length) {
+                return false; // sanity check
+            }
+            for( int j=0; j < b1.length; j++ ){
+                if ( MathUtils.compareDoubles(b1[i][j], b2[i][j]) != 0 && !Double.isInfinite(b1[i][j]) && !Double.isInfinite(b2[i][j]))
+                    return false;
+            }
+        }
+        return true;
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
@ -0,0 +1,257 @@
+package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/27/12
+ */
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Haplotype;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.jgrapht.graph.DefaultDirectedGraph;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
+
+
+    private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
+        public byte[] sequence;
+        public int KMER_LENGTH;
+
+        public MergeNodesWithNoVariationTestProvider(String seq, int kmer) {
+            super(MergeNodesWithNoVariationTestProvider.class, String.format("Merge nodes with no variation test. kmer = %d, seq = %s", kmer, seq));
+            sequence = seq.getBytes();
+            KMER_LENGTH = kmer;
+        }
+
+        public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph() {
+            DeBruijnVertex v = new DeBruijnVertex(sequence, 0);
+            DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            graph.addVertex(v);
+            return graph;
+        }
+
+        public DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> calcGraph() {
+
+            DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+            final int kmersInSequence = sequence.length - KMER_LENGTH + 1;
+            for (int i = 0; i < kmersInSequence - 1; i++) {
+                // get the kmers
+                final byte[] kmer1 = new byte[KMER_LENGTH];
+                System.arraycopy(sequence, i, kmer1, 0, KMER_LENGTH);
+                final byte[] kmer2 = new byte[KMER_LENGTH];
+                System.arraycopy(sequence, i+1, kmer2, 0, KMER_LENGTH);
+
+                SimpleDeBruijnAssembler.addKmersToGraph(graph, kmer1, kmer2, false);
+            }
+            SimpleDeBruijnAssembler.mergeNodes(graph);
+            return graph;
+        }
+    }
+
+    @DataProvider(name = "MergeNodesWithNoVariationTestProvider")
+    public Object[][] makeMergeNodesWithNoVariationTests() {
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 3);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 4);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 5);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 6);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACC", 7);
+        new MergeNodesWithNoVariationTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", 6);
+        new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 66);
+        new MergeNodesWithNoVariationTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 76);
+
+        return MergeNodesWithNoVariationTestProvider.getTests(MergeNodesWithNoVariationTestProvider.class);
+    }
+
+    @Test(dataProvider = "MergeNodesWithNoVariationTestProvider", enabled = true)
+    public void testMergeNodesWithNoVariation(MergeNodesWithNoVariationTestProvider cfg) {
+        logger.warn(String.format("Test: %s", cfg.toString()));
+        Assert.assertTrue(graphEquals(cfg.calcGraph(), cfg.expectedGraph()));
+    }
+
+    @Test(enabled = true)
+    public void testPruneGraph() {
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0);
+        DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0);
+        DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0);
+        DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0);
+        DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0);
+        DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(false, 1));
+        graph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        graph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false, 2));
+
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        SimpleDeBruijnAssembler.pruneGraph(graph, 2);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true, 1));
+        graph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        graph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge(true, 1));
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge(false, 3));
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge(false, 5));
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge(false, 3));
+
+        SimpleDeBruijnAssembler.pruneGraph(graph, 2);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+    }
+
+    @Test(enabled = true)
+    public void testEliminateNonRefPaths() {
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        DeBruijnVertex v = new DeBruijnVertex("ATGG".getBytes(), 0);
+        DeBruijnVertex v2 = new DeBruijnVertex("ATGGA".getBytes(), 0);
+        DeBruijnVertex v3 = new DeBruijnVertex("ATGGT".getBytes(), 0);
+        DeBruijnVertex v4 = new DeBruijnVertex("ATGGG".getBytes(), 0);
+        DeBruijnVertex v5 = new DeBruijnVertex("ATGGC".getBytes(), 0);
+        DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC".getBytes(), 0);
+        
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(false));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v3, v4, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(true));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addVertex(v4);
+        expectedGraph.addVertex(v5);
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+        expectedGraph.addEdge(v3, v4, new DeBruijnEdge());
+        expectedGraph.addEdge(v4, v5, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+        
+        
+        
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge());
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+
+
+
+        graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+        expectedGraph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
+
+        graph.addVertex(v);
+        graph.addVertex(v2);
+        graph.addVertex(v3);
+        graph.addVertex(v4);
+        graph.addVertex(v5);
+        graph.addVertex(v6);
+        graph.addEdge(v, v2, new DeBruijnEdge(true));
+        graph.addEdge(v2, v3, new DeBruijnEdge(true));
+        graph.addEdge(v4, v5, new DeBruijnEdge(false));
+        graph.addEdge(v5, v6, new DeBruijnEdge(false));
+        graph.addEdge(v4, v2, new DeBruijnEdge(false));
+
+        expectedGraph.addVertex(v);
+        expectedGraph.addVertex(v2);
+        expectedGraph.addVertex(v3);
+        expectedGraph.addEdge(v, v2, new DeBruijnEdge());
+        expectedGraph.addEdge(v2, v3, new DeBruijnEdge());
+
+        SimpleDeBruijnAssembler.eliminateNonRefPaths(graph);
+
+        Assert.assertTrue(graphEquals(graph, expectedGraph));
+    }
+
+    private boolean graphEquals(DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g1, DefaultDirectedGraph<DeBruijnVertex,DeBruijnEdge> g2) {
+        if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) {
+            return false;
+        }
+        for( DeBruijnEdge e1 : g1.edgeSet() ) {
+            boolean found = false;
+            for( DeBruijnEdge e2 : g2.edgeSet() ) {
+                if( e1.equals(g1, e2, g2) ) { found = true; break; }
+            }
+            if( !found ) { return false; }
+        }
+        for( DeBruijnEdge e2 : g2.edgeSet() ) {
+            boolean found = false;
+            for( DeBruijnEdge e1 : g1.edgeSet() ) {
+                if( e2.equals(g2, e1, g1) ) { found = true; break; }
+            }
+            if( !found ) { return false; }
+        }
+        return true;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -15,7 +16,7 @@ import java.util.*;
 * The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
 * Note that the base quality rank sum test can not be calculated for homozygous sites.
 */
-public class BaseQualityRankSumTest extends RankSumTest {
+public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
    public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java
@ -18,7 +18,7 @@ import java.util.*;
 * Date: 6/28/12
 */

-public class ClippingRankSumTest /*extends RankSumTest*/ {
+public class ClippingRankSumTest extends RankSumTest {

    public List<String> getKeyNames() { return Arrays.asList("ClippingRankSum"); }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -16,7 +17,7 @@ import java.util.*;
 * The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
 * Note that the mapping quality rank sum test can not be calculated for homozygous sites.
 */
-public class MappingQualityRankSumTest extends RankSumTest {
+public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation {

    public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@ -28,7 +28,7 @@ import java.util.Map;
 /**
 * Abstract root for all RankSum based annotations
 */
-public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
+public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
    static final double INDEL_LIKELIHOOD_THRESH = 0.1;
    static final boolean DEBUG = false;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -21,7 +22,7 @@ import java.util.*;
 * The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
 * Note that the read position rank sum test can not be calculated for homozygous sites.
 */
-public class ReadPosRankSumTest extends RankSumTest {
+public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {

    public List<String> getKeyNames() {
        return Arrays.asList("ReadPosRankSum");