Merge pull request #559 from broadinstitute/vrr_assembly_graph_edge_info_revise

Improved criteria to select best haplotypes out from the assembly graph.
2014-03-17 09:27:19 -04:00 · 2014-03-17 09:27:19 -04:00 · c1b4390691
parent a0c252f084 2e964c59b4
commit c1b4390691
27 changed files with 802 additions and 173 deletions
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@ -213,16 +213,18 @@ public abstract class LocalAssemblyEngine {
                                            final Map<SeqGraph,AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
        // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
        final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
-        returnHaplotypes.add( refHaplotype );

        final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
+        final ArrayList<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());

        for( final SeqGraph graph : graphs ) {
            final SeqVertex source = graph.getReferenceSourceVertex();
            final SeqVertex sink = graph.getReferenceSinkVertex();
            if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);
            final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink);
+            finders.add(haplotypeFinder);
            final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
+
            while (bestHaplotypes.hasNext()) {
                final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
                final Haplotype h = kBestHaplotype.haplotype();
@ -256,9 +258,19 @@ public abstract class LocalAssemblyEngine {
            }
        }

-
-        if ( returnHaplotypes.size() < returnHaplotypes.size() )
-            logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
+        // Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as
+        // the first returned by any finder.
+        if (!returnHaplotypes.contains(refHaplotype)) {
+            double refScore = Double.NaN;
+            for (final KBestHaplotypeFinder finder : finders) {
+                final double candidate = finder.score(refHaplotype);
+                if (Double.isNaN(candidate)) continue;
+                refScore = candidate;
+                break;
+            }
+            refHaplotype.setScore(refScore);
+            returnHaplotypes.add(refHaplotype);
+        }

        if( debug ) {
            if( returnHaplotypes.size() > 1 ) {
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
@ -45,21 +45,21 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.PriorityQueue;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.*;

 /**
 * K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
 *
 * @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
 */
-class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
+class AggregatedSubHaplotypeFinder<F extends KBestSubHaplotypeFinder> implements KBestSubHaplotypeFinder {

    /**
     * Collection of subFinders that provided the actual solutions.
     */
-    private final Collection<? extends KBestSubHaplotypeFinder> subFinders;
+    protected final Collection<F> subFinders;

    /**
     *  Flag indicating whether the sub-finders have been processed or not.
@ -89,17 +89,53 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
     * Creates a new aggregated sub-haplotype finder given its sub-finders.
     * @param finders set of sub-finders.
     */
-    public AggregatedSubHaplotypeFinder(final Collection<? extends KBestSubHaplotypeFinder> finders) {
+    public AggregatedSubHaplotypeFinder(final Collection<F> finders) {
        if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
        this.subFinders = finders;
    }

+    @Override
+    public String id() {
+        final StringBuilder resultBuilder = new StringBuilder();
+        for (final KBestSubHaplotypeFinder subFinder : subFinders)
+            resultBuilder.append(subFinder.id());
+        return resultBuilder.toString();
+    }
+
+    @Override
+    public String label() {
+        return "&lt;OR&gt;";
+    }
+
+    @Override
+    public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
+        final int subFinderCount = subFinders.size();
+        final String edgeCost = String.format("%.2f",-Math.log10((double) subFinderCount));
+        final Set<Pair<? extends  KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinderCount);
+        for (final KBestSubHaplotypeFinder subFinder : subFinders)
+            result.add(new Pair<>(subFinder,edgeCost));
+        return result;
+    }
+
    @Override
    public int getCount() {
        processSubFindersIfNeeded();
        return count;
    }

+    @Override
+    public double score(final byte[] bases, final int offset, final int length) {
+        if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+        if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+        if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+        if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+        for (final KBestSubHaplotypeFinder subFinder : subFinders) {
+            final double score = subFinder.score(bases,offset,length);
+            if (!Double.isNaN(score)) return score;
+        }
+        return Double.NaN;
+    }
+
    private void processSubFindersIfNeeded() {
        if (processedSubFinders) return;

@ -144,6 +180,11 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
        return rankedSubHaplotype.get(k);
    }

+    @Override
+    public boolean isReference() {
+        return false;
+    }
+
    /**
     * Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
     */
@ -167,7 +208,7 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
        }

        @Override
-        public int score() {
+        public double score() {
            return result.score();
        }

--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
@ -52,6 +52,7 @@ import com.google.java.contract.Requires;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.log4j.Logger;
 import org.jgrapht.EdgeFactory;
+import org.jgrapht.alg.CycleDetector;
 import org.jgrapht.graph.DefaultDirectedGraph;

 import java.io.File;
@ -146,6 +147,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
        return set;
    }

+    /**
+     * Convert this kmer graph to a simple sequence graph.
+     *
+     * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
+     * graph.  Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
+     *
+     * @return a newly allocated SequenceGraph
+     */
+    public SeqGraph convertToSequenceGraph() {
+
+        final SeqGraph seqGraph = new SeqGraph(kmerSize);
+        final Map<V, SeqVertex> vertexMap = new HashMap<>();
+
+
+        // create all of the equivalent seq graph vertices
+        for ( final V dv : vertexSet() ) {
+            final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
+            sv.setAdditionalInfo(dv.additionalInfo());
+            vertexMap.put(dv, sv);
+            seqGraph.addVertex(sv);
+        }
+
+        // walk through the nodes and connect them to their equivalent seq vertices
+        for( final E e : edgeSet() ) {
+            final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
+            final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
+            //logger.info("Adding edge " + seqInV + " -> " + seqOutV);
+            seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
+        }
+
+        return seqGraph;
+    }
+
    /**
     * Pull out the additional sequence implied by traversing this node in the graph
     * @param v the vertex from which to pull out the additional base sequence
@ -712,4 +746,13 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
            if (!containsVertex(vertex)) return false;
        return true;
    }
+
+    /**
+     * Checks for the presence of directed cycles in the graph.
+     *
+     * @return {@code true} if the graph has cycles, {@code false} otherwise.
+     */
+    public boolean hasCycles() {
+        return new CycleDetector<>(this).detectCycles();
+    }
 }
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java
@ -122,7 +122,7 @@ public class CommonSuffixSplitter {
                    } else {
                        incomingTarget = prefixV;
                        graph.addVertex(prefixV);
-                        graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0));
+                        graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
                        edgesToRemove.add(out);
                    }

--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
@ -45,6 +45,11 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Collections;
+import java.util.Set;
+
 /**
 * Represents a trivial k-best sub haplotype finder with no solutions.
 *
@ -65,6 +70,21 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
    protected DeadEndKBestSubHaplotypeFinder() {
    }

+    @Override
+    public String id() {
+        return "<DEAD>";
+    }
+
+    @Override
+    public String label() {
+        return "&lt;DEAD&gt;";
+    }
+
+    @Override
+    public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
+        return Collections.emptySet();
+    }
+
    @Override
    public int getCount() {
        return 0;
@ -77,4 +97,18 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
        else
            throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count");
    }
+
+    @Override
+    public boolean isReference() {
+        return false;
+    }
+
+    @Override
+    public double score(final byte[] bases, final int offset, final int length) {
+        if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+        if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+        if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+        if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+        return Double.NaN;
+    }
 }
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
@ -45,6 +45,12 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Collections;
+import java.util.Set;
+
 /**
 * Trivial k-best sub-haplotype finder where the source and sink vertex are the same one.
 *
@ -67,6 +73,21 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
        singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
    }

+    @Override
+    public String id() {
+        return "v" + singleHaplotypePath.head().getId();
+    }
+
+    @Override
+    public String label() {
+        return singleHaplotypePath.head().getSequenceString();
+    }
+
+    @Override
+    public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
+        return Collections.emptySet();
+    }
+
    @Override
    public int getCount() {
        return 1;
@ -81,6 +102,24 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
        return singleHaplotypePath;
    }

+    @Override
+    public boolean isReference() {
+        return singleHaplotypePath.isReference();
+    }
+
+    @Override
+    public double score(final byte[] bases, final int offset, final int length) {
+        if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+        if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+        if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+        if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+        final byte[] vertexBases = singleHaplotypePath.head().getSequence();
+        if (length != vertexBases.length)
+            return Double.NaN;
+        else
+            return Utils.equalRange(bases, offset, vertexBases, 0, length)? 0 : Double.NaN;
+    }
+
    /**
     * Custom extension of {@link KBestHaplotype} that implements the single solution behaviour.
     */
@ -120,7 +159,7 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
        }

        @Override
-        public int score() {
+        public double score() {
            return 0;
        }

--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
@ -68,7 +68,7 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
     *
     * @return 0 or greater.
     */
-    public abstract int score();
+    public abstract double score();

    /**
     * Indicates whether this result is the reference haplotype.
@ -122,6 +122,8 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
    public Haplotype haplotype() {
        if (haplotype != null) return haplotype;
        haplotype = new Haplotype(bases(),isReference());
+        if (score() > 0)
+            throw new IllegalStateException("score cannot be greater than 0: " + score());
        haplotype.setScore(score());
        return haplotype;
    }
@ -152,7 +154,35 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
     */
    public int compareTo(final KBestHaplotype other) {
        if (other == null) throw new IllegalArgumentException("the other object cannot be null");
-        return - 1 * (score() - other.score());
+        return - Double.compare(score(), other.score());
+    }
+
+    @Override
+    public int hashCode() {
+        return haplotype().hashCode();
+    }
+
+    @Override
+    public boolean equals(final Object other) {
+        return other == null ? false: (other instanceof KBestHaplotype ? equals((KBestHaplotype)other) : false);
+    }
+
+    @Override
+    public String toString() {
+        return haplotype().toString() + " Score = "  + score();
+    }
+
+    /**
+     * Checks whether both solutions are equal.
+     * <p>
+     *     Both solutions are considered equal when the underlying haplotypes are equal. The path on the respective
+     *     graph might deffer though.
+     * </p>
+     *
+     * @return {@code true} iff both haplotypes are the same (considering the ref state).
+     */
+    protected boolean equals(final KBestHaplotype other) {
+       return haplotype().equals(other.haplotype(),false);
    }

    /**
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
@ -45,8 +45,13 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.haplotype.Haplotype;
 import org.jgrapht.alg.CycleDetector;

+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintWriter;
 import java.util.*;

 /**
@ -233,7 +238,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
    }

    @Override
-    public KBestHaplotype get(int index) {
+    public KBestHaplotype get(final int index) {
        if (index < 0 || index >= size())
            throw new IndexOutOfBoundsException();
        return topFinder.getKBest(index);
@ -305,28 +310,28 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
    /**
     * Creates a finder from a vertex.
     *
-     * @param source the source vertex for the finder.
+     * @param vertex the source vertex for the finder.
     *
     * @return never {@code null}, perhaps a finder that returns no haplotypes though.
     */
-    protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) {
-        KBestSubHaplotypeFinder node = finderByVertex.get(source);
-        if (node == null) {
-            if (sinks.contains(source))
-                node = new EmptyPathHaplotypeFinderNode(graph,source);
+    protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex vertex) {
+        KBestSubHaplotypeFinder finder = finderByVertex.get(vertex);
+        if (finder == null) {
+            if (sinks.contains(vertex))
+                finder = new EmptyPathHaplotypeFinderNode(graph,vertex);
            else {
-                final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(source);
+                final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(vertex);
                if (outgoingEdges.isEmpty())
-                    node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
+                    finder = DeadEndKBestSubHaplotypeFinder.INSTANCE;
                else {
                    final Map<BaseEdge,KBestSubHaplotypeFinder> undeadChildren = createChildrenFinders(outgoingEdges);
-                    node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
-                            new RecursiveSubHaplotypeFinder(source,undeadChildren);
+                    finder = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
+                            new RecursiveSubHaplotypeFinder(graph,vertex,undeadChildren);
                }
            }
-            finderByVertex.put(source, node);
+            finderByVertex.put(vertex, finder);
        }
-        return node;
+        return finder;
    }

    /**
@ -340,7 +345,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
     * @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
     *  finder.
     */
-    private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(Set<BaseEdge> baseEdges) {
+    private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(final Set<BaseEdge> baseEdges) {
        final Map<BaseEdge,KBestSubHaplotypeFinder> result = new LinkedHashMap<>(baseEdges.size());
        for (final BaseEdge edge : baseEdges) {
            final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
@ -349,4 +354,156 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
        }
        return result;
    }
+
+    /**
+     * Print a DOT representation of search graph.
+     *
+     * @param out character stream printer where to print the DOT representation to.
+     *
+     * @throws IllegalArgumentException if {@code out} is {@code null}.
+     */
+    public void printDOT(final PrintWriter out) {
+        if (out == null)
+            throw new IllegalArgumentException("the out writer cannot be null");
+        out.println("digraph {");
+        out.println("    rankdir = LR");
+        out.println("    node [shape=box, margin=0.01]");
+        out.println("    subgraph cluster_dummy { style = invis; x [label=\"\",shape=none,margin=0] }");
+        final StringBuilder referenceCluster = new StringBuilder(1000);
+
+        referenceCluster.append("    subgraph cluster_ref {\n");
+        referenceCluster.append("        node [penwidth=2]\n");
+        for (final KBestSubHaplotypeFinder finder : finderByVertex.values() ) {
+            final String id = finder.id();
+            final String line = String.format("    %s [label=<%s>]",id,finder.label());
+            if (finder.isReference())
+                referenceCluster.append("    ").append(line).append('\n');
+            else
+                out.println(line);
+        }
+        referenceCluster.append("    }");
+        out.println(referenceCluster.toString());
+
+        for (final KBestSubHaplotypeFinder finder : finderByVertex.values())
+            for (final Pair<? extends KBestSubHaplotypeFinder,String> subFinderLabel : finder.subFinderLabels()) {
+                final KBestSubHaplotypeFinder subFinder = subFinderLabel.getFirst();
+
+                final String edgeLabel = subFinderLabel.getSecond();
+                out.println(String.format("    %s -> %s [label=%s]",finder.id(),subFinder.id(),edgeLabel));
+            }
+        out.println("}");
+    }
+
+    /**
+     * Print a DOT representation of search graph.
+     *
+     * @param file file where to print the DOT representation to.
+     *
+     * @throws IllegalArgumentException if {@code file} is {@code null}.
+     * @throws FileNotFoundException if {@code file} cannot be created or written.
+     * @throws IllegalStateException if there was some trouble when writing the DOT representation.
+     */
+    public void printDOT(final File file) throws FileNotFoundException {
+        if (file == null)
+            throw new IllegalArgumentException("the output file cannot be null");
+        final PrintWriter out = new PrintWriter(file);
+        printDOT(out);
+        if (out.checkError())
+            throw new IllegalStateException("error occurred while writing k-best haplotype search graph into file '"
+                    + file.getAbsolutePath() + "'");
+        out.close();
+    }
+
+    /**
+     * Print a DOT representation of search graph.
+     *
+     * @param fileName name of the file where to print the DOT representation to.
+     *
+     * @throws IllegalArgumentException if {@code fileName} is {@code null}.
+     * @throws FileNotFoundException if no file named {@code fileName} cannot be created or written.
+     * @throws IllegalStateException if there was some trouble when writing the DOT representation.
+     */
+    @SuppressWarnings("unused") // Available for debugging purposes.
+    public void printDOTFile(final String fileName) throws FileNotFoundException {
+        printDOT(new File(fileName));
+    }
+
+    /**
+     * Get the score of a give sequence of bases
+     *
+     * @param bases the base sequence.
+     *
+     * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
+     *   throw this finder.
+     */
+    public double score(final byte[] bases) {
+        return topFinder.score(bases,0,bases.length);
+    }
+
+    /**
+     * Get the score of a give sequence of bases
+     *
+     * @param haplotype the haplotype.
+     *
+     * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
+     *   throw this finder.
+     */
+    public double score(final Haplotype haplotype) {
+        return score(haplotype.getBases());
+    }
+
+
+    /**
+     * Returns a unique list of haplotypes solutions.
+     * <p>
+     *     The result will not contain more than one haplotype with the same base sequence. The solution of the best
+     *     score is returned.
+     * </p>
+     * <p>
+     *     This makes sense when there are more than one possible path through the graph to create the same haplotype.
+     * </p>
+     * <p>
+     *     The resulting list is sorted by the score with more likely haplotype search results first.
+     * </p>
+     *
+     * @param maxSize maximum number of unique results to return.
+     *
+     * @throws IllegalArgumentException if {@code maxSize} is negative.
+     *
+     * @return never {@code null}, perhaps an empty list.
+     */
+    public List<KBestHaplotype> unique(final int maxSize) {
+        if (maxSize < 0) throw new IllegalArgumentException("maxSize cannot be negative");
+        final int requiredCapacity = Math.min(maxSize,size());
+        final Set<Haplotype> haplotypes = new HashSet<>(requiredCapacity);
+        int resultSize = 0;
+        final List<KBestHaplotype> result = new ArrayList<>(requiredCapacity);
+        for (final KBestHaplotype kbh : this) {
+            if (haplotypes.add(kbh.haplotype())) {
+                result.add(kbh);
+                if (resultSize == maxSize) break;
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Returns a unique list of haplotypes solutions.
+     *
+     * <p>
+     *     The result will not contain more than one haplotype with the same base sequence. The solution of the best
+     *     score is returned.
+     * </p>
+     * <p>
+     *     This makes sense when there are more than one possible path through the graph to create the same haplotype.
+     * </p>
+     * <p>
+     *     The resulting list is sorted by the score with more likely haplotype search results first.
+     * </p>
+     *
+     * @return never {@code null}, perhaps an empty list.
+     */
+    public List<KBestHaplotype> unique() {
+        return unique(size());
+    }
 }
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
@ -45,6 +45,10 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Set;
+
 /**
 * Common interface for K-Best sub-haplotype finders.
 *
@ -52,6 +56,29 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
 */
 interface KBestSubHaplotypeFinder {

+    /**
+     * Return an unique id for this sub-haplotype finder to be used when outputting diagrams.
+     *
+     * @return never {@code null}.
+     */
+    public String id();
+
+    /**
+     * Returns a label with human readable representation of this finder.
+     *
+     * <p>This is used when generating a diagram to illustrate the search space and costs</p>
+     *
+     * @return never {@code null}.
+     */
+    public String label();
+
+    /**
+     * Returns the set of subfinder from this finder together with a label for the connection with the current finder.
+     *
+     * <p>The label is used when generating a diagram to illustrate the search space and costs</p>
+     */
+    public Set<Pair<? extends KBestSubHaplotypeFinder,String>> subFinderLabels();
+
    /**
     * Returns the total number of possible sub-haplotypes.
     * @return 0 or greater.
@ -67,5 +94,22 @@ interface KBestSubHaplotypeFinder {
     *
     * @return never {@code null}.
     */
-    public abstract KBestHaplotype getKBest(int k);
+    public KBestHaplotype getKBest(int k);
+
+    /**
+     * Checks whether the top vertex for this finder is a reference haplotype vertex.
+     *
+     * @return {@code true} iff the top vertex for this finder is a reference vertex.
+     */
+    public boolean isReference();
+
+    /**
+     * Calculate the score of a sequence of bases.
+     *
+     * @param bases array containing the query base sequence.
+     * @param offset first position of the query base sequence in {@code bases} .
+     * @param length length of the query base sequence.
+     * @return {@link Double#NaN} if there is no score for this sequence, otherwise a valid score value.
+     */
+    public double score(byte[] bases, int offset, int length);
 }
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java
@ -49,20 +49,24 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
 import java.util.PriorityQueue;

 /**
- * edge class for connecting nodes in the graph that tracks some per-sample information
- *
+ * Edge class for connecting nodes in the graph that tracks some per-sample information.
+ * <p>
 * This class extends BaseEdge with the additional functionality of tracking the maximum
 * multiplicity seen within any single sample.  The workflow for using this class is:
- *
- * MultiSampleEdge e = new MultiSampleEdge(ref, 1)
- * e.incMultiplicity(1)              // total is 2, per sample is 2, max per sample is 1
- * e.getPruningMultiplicity()        // = 1
- * e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
- * e.getPruningMultiplicity()        // = 2
- * e.incMultiplicity(3)              // total is 5, per sample is 3, max per sample is 2
- * e.getPruningMultiplicity()        // = 2
- * e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
- * e.getPruningMultiplicity()        // = 3
+ * </p>
+ * <pre>
+ * {@code
+ *      MultiSampleEdge e = new MultiSampleEdge(ref, 1)
+ *      e.incMultiplicity(1)              // total is 2, per sample is 2, max per sample is 1
+ *      e.getPruningMultiplicity()        // = 1
+ *      e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
+ *      e.getPruningMultiplicity()        // = 2
+ *      e.incMultiplicity(3)              // total is 5, per sample is 3, max per sample is 2
+ *      e.getPruningMultiplicity()        // = 2
+ *      e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
+ *      e.getPruningMultiplicity()        // = 3
+ * }
+ * </pre>
 */
 public class MultiSampleEdge extends BaseEdge {
    private int currentSingleSampleMultiplicity;
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
@ -45,9 +45,10 @@
 */
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Map;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.*;

 /**
 * General recursive sub-haplotype finder.
@ -67,7 +68,11 @@ import java.util.Map;
 *
 * @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
 */
-class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
+class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder<RecursiveSubHaplotypeFinder.EdgeSubHaplotypeFinder> {
+
+
+    private final SeqVertex vertex;
+    private final boolean isReference;

    /**
     * Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
@ -80,20 +85,83 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
     * @param vertex first vertex for all sub-haplotype solutions provided by this finder
     * @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
     */
-    public RecursiveSubHaplotypeFinder(final SeqVertex vertex,
+    public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex,
                                       final Map<BaseEdge, KBestSubHaplotypeFinder> children) {
        super(createChildFinderCollection(vertex, children));
+        this.vertex = vertex;
+        this.isReference = graph.isReferenceNode(vertex);
    }

-    private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex, final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
+    /**
+     * Wraps the descendant vertices finders in order to take advantage of the {@link AggregatedSubHaplotypeFinder}
+     * common code.
+     * <p>
+     * Automatically calibrates the edge score (cost) so that it takes into account the total across all edges.
+     * </p>
+     *
+     * @param vertex the parent vertex.
+     * @param finders the child vertices indexed by the connecting edge.
+     * @return never {@code null} but potentially an empty collection if there is child returning some sub-haplotype
+     *    solution.
+     */
+    private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex,
+                                                             final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
        if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
-        final Collection<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
-        for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet())
-            result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()));
+        final ArrayList<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
+        for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet()) {
+            final EdgeSubHaplotypeFinder subFinder = new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue());
+            if (subFinder.getCount() == 0) continue;
+            result.add(subFinder);
+        }
+        if (result.size() == 0)
+            return Collections.emptySet();
+        else if (result.size() == 1) // no calibration needed, by default edgeScore is 0.
+            return Collections.singleton(result.get(0));
+        else {
+            double totalEdgeMultiplicityAcrossEdges = 0;
+            for (final EdgeSubHaplotypeFinder finder : result)
+                totalEdgeMultiplicityAcrossEdges += Math.max(0.5,finder.edge.getMultiplicity());
+            final double log10TotalEdgeMultiplicityAcrossEdges = Math.log10(totalEdgeMultiplicityAcrossEdges);
+            for (final EdgeSubHaplotypeFinder finder : result)
+                finder.calibrateEdgeScore(log10TotalEdgeMultiplicityAcrossEdges);
+            return result;
+        }
+    }
+
+    @Override
+    public boolean isReference() {
+        return isReference;
+    }
+
+    @Override
+    public String label() {
+        return vertex.getSequenceString();
+    }
+
+    @Override
+    public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
+        final Set<Pair<? extends KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinders.size());
+        for (final EdgeSubHaplotypeFinder subFinder : subFinders)
+            result.add(new Pair<>(subFinder,simplifyZeros(String.format("%.4f", subFinder.edgeScore))));
        return result;
    }

-    private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
+    /**
+     * Removes zeros decimal positions from edge-labels.
+     *
+     * @param edgeLabel the original label to reformat.
+     * @return never {@code null}, the reformatted label.
+     */
+    private String simplifyZeros(final String edgeLabel) {
+        if (edgeLabel.equals("0.000") || edgeLabel.equals("-0.000") )
+            return "0.";
+        int i = edgeLabel.length() - 1;
+        while (edgeLabel.charAt(i) == '0')
+            i--;
+        return (i == edgeLabel.length() - 1) ? edgeLabel : edgeLabel.substring(0,i);
+    }
+
+    protected static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {

        private final KBestSubHaplotypeFinder childFinder;

@ -101,10 +169,32 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {

        private final BaseEdge edge;

+        private double edgeScore = 0;
+
        private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
            this.childFinder = childFinder;
            this.edge = edge;
            this.vertex = vertex;
+            this.edgeScore = 0;
+        }
+
+        private void calibrateEdgeScore(final double log10TotalMultiplicityAcrossOutgoingEdges) {
+            edgeScore = Math.log10(Math.max(edge.getMultiplicity(),0.5)) - log10TotalMultiplicityAcrossOutgoingEdges;
+        }
+
+        @Override
+        public String id() {
+            return childFinder.id();
+        }
+
+        @Override
+        public String label() {
+            return childFinder.label();
+        }
+
+        @Override
+        public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
+            return childFinder.subFinderLabels();
        }

        @Override
@ -114,8 +204,31 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {

        @Override
        public KBestHaplotype getKBest(int k) {
-            return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k));
+            return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k),edgeScore);
        }
+
+        @Override
+        public boolean isReference() {
+            return childFinder.isReference();
+        }
+
+        @Override
+        public double score(final byte[] bases, final int offset, final int length) {
+            if (length == 0)
+                return 0;
+            final byte[] vertexSequence = vertex.getSequence();
+            if (length < vertexSequence.length) // query is not long enough to have any score.
+                return Double.NaN;
+            else if (!Utils.equalRange(vertexSequence,0,bases,offset,vertexSequence.length))
+                return Double.NaN;
+            else
+                return edgeScore + childFinder.score(bases,offset + vertexSequence.length,length - vertexSequence.length);
+        }
+    }
+
+    @Override
+    public String id() {
+        return "v" + vertex.getId();
    }

    /**
@ -129,13 +242,14 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
     */
    private static class ChildKBestSubHaplotype extends KBestHaplotype {

-        private final int score;
+        private final double score;
        private final KBestHaplotype child;
        private final SeqVertex vertex;
        private final boolean isReference;

-        public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) {
-            this.score = edge.getMultiplicity() + child.score();
+
+        public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child, final double edgeScore) {
+            this.score = edgeScore + child.score();
            this.vertex = vertex;
            this.child = child;
            this.isReference = edge.isRef() && child.isReference();
@ -147,7 +261,7 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
        }

        @Override
-        public int score() {
+        public double score() {
            return score;
        }

--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java
@ -291,16 +291,9 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
        final SeqVertex addedVertex = mergeLinearChainVertices(linearChain);
        addVertex(addedVertex);

-        final Set<BaseEdge> inEdges = incomingEdgesOf(first);
-        final Set<BaseEdge> outEdges = outgoingEdgesOf(last);
-
-        final int nEdges = inEdges.size() + outEdges.size();
-        int sharedWeightAmongEdges = nEdges == 0 ? 0 : sumEdgeWeightAlongChain(linearChain) / nEdges;
-        final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy
-
        // update the incoming and outgoing edges to point to the new vertex
-        for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); }
-        for( final BaseEdge edge : inEdges )  { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); }
+        for( final BaseEdge edge : outgoingEdgesOf(last) ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy()); }
+        for( final BaseEdge edge : incomingEdgesOf(first) )  { addEdge(getEdgeSource(edge), addedVertex, edge.copy()); }

        removeAllVertices(linearChain);
        return true;
@ -313,29 +306,6 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
        return new SeqVertex( seqsCat );
    }

-    /**
-     * Get the sum of the edge weights on a linear chain of at least 2 elements
-     *
-     * @param chain a linear chain of vertices with at least 2 vertices
-     * @return the sum of the multiplicities along all edges connecting vertices within the chain
-     */
-    @Requires({"chain != null", "chain.size() >= 2"})
-    private int sumEdgeWeightAlongChain(final LinkedList<SeqVertex> chain) {
-        int sum = 0;
-        SeqVertex prev = null;
-
-        for ( final SeqVertex v : chain ) {
-            if ( prev != null ) {
-                final BaseEdge e = getEdge(prev, v);
-                if ( e == null ) throw new IllegalStateException("Something wrong with the linear chain, got a null edge between " + prev + " and " + v);
-                sum += e.getMultiplicity();
-            }
-            prev = v;
-        }
-
-        return sum;
-    }
-
    /**
     * Base class for transformation operations that need to iterate over proposed vertices, where
     * each proposed vertex is a seed vertex for a potential transformation.
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java
@ -247,12 +247,12 @@ public class SharedVertexSequenceSplitter {

        if ( needPrefixNode ) {
            outer.addVertex(prefixV);
-            if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 0));
+            if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 1));
        }

        if ( needSuffixNode ) {
            outer.addVertex(suffixV);
-            if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 0));
+            if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 1));
        }

        if ( topForConnect != null ) {
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
@ -52,7 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer;
 import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.jgrapht.alg.CycleDetector;

 import java.io.File;
 import java.util.*;
@ -88,8 +87,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
    /**
     *
     */
-
-    final boolean debugGraphTransformations;
+    private final boolean debugGraphTransformations;
    final byte minBaseQualityToUseInAssembly;

    protected boolean increaseCountsBackwards = true;
@ -319,13 +317,6 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
        removeAllVertices(verticesToRemove);
    }

-    /**
-     * @return true if the graph has cycles, false otherwise
-     */
-    public boolean hasCycles() {
-        return new CycleDetector<>(this).detectCycles();
-    }
-
    /**
     * Does the graph not have enough complexity?  We define low complexity as a situation where the number
     * of non-unique kmers is more than 20% of the total number of kmers.
@ -419,39 +410,10 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
        return counter.getKmersWithCountsAtLeast(2);
    }

-    /**
-     * Convert this kmer graph to a simple sequence graph.
-     *
-     * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
-     * graph.  Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
-     *
-     * @return a newly allocated SequenceGraph
-     */
-    // TODO -- should override base class method
+    @Override
    public SeqGraph convertToSequenceGraph() {
        buildGraphIfNecessary();
-
-        final SeqGraph seqGraph = new SeqGraph(kmerSize);
-        final Map<MultiDeBruijnVertex, SeqVertex> vertexMap = new HashMap<>();
-
-
-        // create all of the equivalent seq graph vertices
-        for ( final MultiDeBruijnVertex dv : vertexSet() ) {
-            final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
-            sv.setAdditionalInfo(dv.additionalInfo());
-            vertexMap.put(dv, sv);
-            seqGraph.addVertex(sv);
-        }
-
-        // walk through the nodes and connect them to their equivalent seq vertices
-        for( final MultiSampleEdge e : edgeSet() ) {
-            final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
-            final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
-            //logger.info("Adding edge " + seqInV + " -> " + seqOutV);
-            seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
-        }
-
-        return seqGraph;
+        return super.convertToSequenceGraph();
    }

    private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers,
@ -749,15 +711,15 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
    }

    private static String pathElementId(final String element) {
-        final int parentesysPos = element.indexOf('(');
+        final int openBracketPosition = element.indexOf('(');

-        if (parentesysPos == -1)
+        if (openBracketPosition == -1)
            return null;

-        final int closeParentesysPos = element.lastIndexOf(')');
-        if (closeParentesysPos == -1)
+        final int closeBracketPosition = element.lastIndexOf(')');
+        if (closeBracketPosition == -1)
            throw new IllegalArgumentException("non-closed id parantesys found in element: " + element);
-        final String result = element.substring(parentesysPos + 1,closeParentesysPos).trim();
+        final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim();
        if (result.isEmpty())
            throw new IllegalArgumentException("empty id found in element: " + element);
        return result;
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
-                "f50e0b35e2240b19b1b8b6dfa0cf9796");
+                "5ac3bfe1da1d411b52a98ef3debbd318");
    }

    private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
-                "21e521d51b826450d348e5201684ffe4");
+                "61972c7c0d378e756f3b4d99aed9d0cf");
    }

 }
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@ -68,8 +68,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {

        // this functionality can be adapted to provide input data for whatever you might want in your data
        tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
-        tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"});
-        tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"});
+        tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"});
+        tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
        tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
        tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
        tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("0998be22d7af4372247f5a0338f9446b"));
+                Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGSGraphBased() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("1aeed297a3cb41940d83eac499a2ce07"));
+                Arrays.asList("eda8f91091fe462205d687ec49fc61e7"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestAggressivePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
-                Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013"));
+                Arrays.asList("73c52372a1a80f052ea2b728ee17bf22"));
        executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
    }

@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestConservativePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
-                Arrays.asList("dcb38cb9280f2c3059a09d323db1c633"));
+                Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b"));
        executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
    }

@ -298,4 +298,25 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
        spec.disableShadowBCF();
        executeTest("testGraphBasedNoSuchEdgeBugFix", spec);
    }
+
+    @Test
+    public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
+        final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
+                b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
+                HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e2e6647f7c96e91aeead7301017dc800"));
+        spec.disableShadowBCF();
+        executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
+    }
+
+    @Test
+    public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
+        final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
+                hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
+                HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e"));
+        spec.disableShadowBCF();
+        executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
+    }
+
 }
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java
@ -47,7 +47,6 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;

 import org.broadinstitute.sting.WalkerTest;
-import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

@ -61,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
        List<Object[]> tests = new ArrayList<>();

        for ( final int nct : Arrays.asList(1, 2, 4) ) {
-            tests.add(new Object[]{nct, "1f463bf3a06c401006858bc446ecea54"});
+            tests.add(new Object[]{nct, "fd9324a574f9204f7308fc1af422fdcc"});
        }

        return tests.toArray(new Object[][]{});
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java
@ -52,10 +52,11 @@ import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

 import java.io.File;
+import java.io.IOException;
 import java.util.*;

 public class CommonSuffixMergerUnitTest extends BaseTest {
-    private final static boolean PRINT_GRAPHS = true;
+    private final static boolean PRINT_GRAPHS = false;

    @DataProvider(name = "CompleteCycleData")
    public Object[][] makeCompleteCycleData() {
@ -134,11 +135,35 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
        return toUse.toArray(new Object[][]{});
    }

+    /**
+     * Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
+     */
+    private static final Comparator<KBestHaplotype> KBESTHAPLOTYPE_COMPARATOR = new Comparator<KBestHaplotype>() {
+
+        /**
+         * Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
+         *
+         * @return {@inheritDoc}
+         */
+        @Override
+        public int compare(final KBestHaplotype o1,final KBestHaplotype o2) {
+            final int baseCmp = o1.haplotype().getBaseString().compareTo(o2.haplotype().getBaseString());
+            if (baseCmp != 0)
+                return baseCmp;
+            return - Double.compare(o1.score(),o2.score());
+        }
+    };
+
+
    public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
+        final KBestHaplotypeFinder originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
+        final KBestHaplotypeFinder actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
+        final List<KBestHaplotype> sortedOriginalKBestHaplotypes = new ArrayList<>(originalKBestHaplotypes);
+        Collections.sort(sortedOriginalKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
+        final List<KBestHaplotype> sortedActualKBestHaplotypes = new ArrayList<>(actualKBestHaplotypes);
+        Collections.sort(sortedActualKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
        try {
            final Set<String> haplotypes = new HashSet<String>();
-            final List<KBestHaplotype> originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
-            final List<KBestHaplotype> actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());

            for (final KBestHaplotype kbh : originalKBestHaplotypes)
                haplotypes.add(new String(kbh.bases()));
@ -148,14 +173,16 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
                Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
            }

-            if ( actualKBestHaplotypes.size() == originalKBestHaplotypes.size() ) {
-                for ( int i = 0; i < originalKBestHaplotypes.size(); i++ ) {
-                    Assert.assertTrue(actualKBestHaplotypes.get(i).haplotype().getBaseString().equals(originalKBestHaplotypes.get(i).haplotype().getBaseString()), "Paths not equal " + actualKBestHaplotypes.get(i).haplotype() + " vs. original " + originalKBestHaplotypes.get(i).haplotype());
-                }
-            }
+            Assert.assertEquals(sortedActualKBestHaplotypes,sortedOriginalKBestHaplotypes);
        } catch ( AssertionError e ) {
            if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0);
            if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0);
+            try {
+                if ( PRINT_GRAPHS ) originalKBestHaplotypes.printDOTFile(String.format("%s.original.finder.dot",name));
+                if ( PRINT_GRAPHS ) actualKBestHaplotypes.printDOTFile(String.format("%s.actual.finder.dot",name));
+            } catch (IOException e2) {
+                // do nothing.
+            }
            throw e;
        }
    }
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java
@ -65,7 +65,6 @@ import java.util.*;
 * User: rpoplin
 * Date: 1/31/13
 */
-
 public class KBestHaplotypeFinderUnitTest extends BaseTest {

    @DataProvider(name = "BasicPathFindingData")
@ -113,11 +112,11 @@ public class KBestHaplotypeFinderUnitTest extends BaseTest {
        final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * nEndNodes;
        Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths");

-        int lastScore = Integer.MAX_VALUE;
+        double lastScore = 0;
        for ( final KBestHaplotype kbh : paths ) {
            final Path<SeqVertex,BaseEdge> path = kbh.path();
-            Assert.assertTrue(path.getScore() <= lastScore, "Paths out of order.   Path " + path + " has score above previous " + lastScore);
-            lastScore = path.getScore();
+            Assert.assertTrue(kbh.score() <= lastScore, "Paths out of order.   Path " + path + " has score " + path.getScore() + " above previous " + lastScore);
+            lastScore = kbh.score();
        }

        // get the best path, and make sure it's the same as our optimal path overall
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java
@ -47,6 +47,7 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;

 import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.testng.Assert;
@ -226,28 +227,34 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest {
        }

        final Set<String> haplotypes = new HashSet<>();
-        final List<KBestHaplotype> originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
+        final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
        for ( final KBestHaplotype path : originalPaths )
            haplotypes.add(new String(path.bases()));

        final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
        splitter.split();
-        if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".original.dot"), 0);
-        if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + ".split.dot"), 0);
+        if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0);
+        if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0);
        splitter.updateGraph(top, bot);
-        if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".updated.dot"), 0);
+        if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0);

-        final List<KBestHaplotype> splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
+        final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
        for ( final KBestHaplotype path : splitPaths ) {
            final String h = new String(path.bases());
            Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
        }

-        if ( splitPaths.size() == originalPaths.size() ) {
-            for ( int i = 0; i < originalPaths.size(); i++ ) {
-                Assert.assertTrue(splitPaths.get(i).path().equalScoreAndSequence(originalPaths.get(i).path()), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i));
-            }
-        }
+
+        final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size());
+        for (final KBestHaplotype kbh : originalPaths.unique())
+            sortedOriginalPaths.add(kbh.bases());
+        Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR);
+        final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size());
+        for (final KBestHaplotype kbh : splitPaths.unique())
+            sortedSplitPaths.add(kbh.bases());
+        Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR);
+
+        Assert.assertEquals(sortedSplitPaths,sortedOriginalPaths,Utils.join("_", strings) + "_" + hasTop + "_" + hasBot);
    }

    @DataProvider(name = "MeetsMinSequenceData")
--- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/BaseUtils.java
@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;

 import java.util.Arrays;
+import java.util.Comparator;

 /**
 * BaseUtils contains some basic utilities for manipulating nucleotides.
@ -589,4 +590,26 @@ public class BaseUtils {
                throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
        }
    }
+
+
+    /**
+     * Lexicographical sorting of base arrays {@link Comparator}.
+     */
+    public static final Comparator<byte[]> BASES_COMPARATOR = new Comparator<byte[]> (){
+
+        @Override
+        public int compare(final byte[] o1,final byte[] o2) {
+            final int minLength = Math.min(o1.length,o2.length);
+            for (int i = 0; i < minLength; i++) {
+                final int cmp = Byte.compare(o1[i],o2[i]);
+                if (cmp != 0) return cmp;
+            }
+            if (o1.length == o2.length)
+                return 0;
+            else if (o1.length == minLength)
+                return -1;
+            else
+                return 1;
+        }
+    };
 }
--- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/Utils.java
+++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/Utils.java
@ -852,4 +852,34 @@ public class Utils {

        return lst;
    }
+
+    /**
+     * Compares sections from to byte arrays to verify whether they contain the same values.
+     *
+     * @param left first array to compare.
+     * @param leftOffset first position of the first array to compare.
+     * @param right second array to compare.
+     * @param rightOffset first position of the second array to compare.
+     * @param length number of positions to compare.
+     *
+     * @throws IllegalArgumentException if <ul>
+     *     <li>either {@code left} or {@code right} is {@code null} or</li>
+     *     <li>any off the offset or length combine point outside any of the two arrays</li>
+     * </ul>
+     * @return {@code true} iff {@code length} is 0 or all the bytes in both ranges are the same two-by-two.
+     */
+    public static boolean equalRange(final byte[] left, final int leftOffset, byte[] right, final int rightOffset, final int length) {
+        if (left == null) throw new IllegalArgumentException("left cannot be null");
+        if (right == null) throw new IllegalArgumentException("right cannot be null");
+        if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+        if (leftOffset < 0) throw new IllegalArgumentException("left offset cannot be negative");
+        if (leftOffset + length > left.length) throw new IllegalArgumentException("length goes beyond end of left array");
+        if (rightOffset < 0) throw new IllegalArgumentException("right offset cannot be negative");
+        if (rightOffset + length > right.length) throw new IllegalArgumentException("length goes beyond end of right array");
+
+        for (int i = 0; i < length; i++)
+            if (left[leftOffset + i] != right[rightOffset + i])
+                return false;
+        return true;
+    }
 }
--- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/haplotype/Haplotype.java
+++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/haplotype/Haplotype.java
@ -49,7 +49,7 @@ public class Haplotype extends Allele {
    private EventMap eventMap = null;
    private Cigar cigar;
    private int alignmentStartHapwrtRef;
-    private double score = 0;
+    private double score = Double.NaN;

    /**
     * Main constructor
@ -301,7 +301,7 @@ public class Haplotype extends Allele {
     * @return a double, where higher values are better
     */
    public double getScore() {
-        return this.isReference() ? Double.MAX_VALUE : score;
+        return score;
    }

    /**
@ -312,7 +312,7 @@ public class Haplotype extends Allele {
     * @param score a double, where higher values are better
     */
    public void setScore(double score) {
-        this.score = this.isReference() ? Double.MAX_VALUE : score;
+        this.score = score;
    }

    /**
--- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/BaseTest.java
+++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/BaseTest.java
@ -91,6 +91,7 @@ public abstract class BaseTest {
    //public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
    public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
    public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
+    public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";
    public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
    public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
    public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
--- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java
+++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java
@ -26,9 +26,16 @@
 package org.broadinstitute.sting.utils;

 import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.testng.Assert;
-import org.testng.annotations.Test;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Random;


 public class BaseUtilsUnitTest extends BaseTest {
@ -123,4 +130,50 @@ public class BaseUtilsUnitTest extends BaseTest {

        Assert.assertTrue(rcObs.equals(rcExp));
    }
+
+    @Test(dataProvider="baseComparatorData")
+    public void testBaseComparator(final Collection<byte[]> basesToSort) {
+        final ArrayList<byte[]> sorted = new ArrayList<>(basesToSort);
+        Collections.sort(sorted, BaseUtils.BASES_COMPARATOR);
+        for (int i = 0; i < sorted.size(); i++)   {
+            Assert.assertEquals(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(i)),0);
+            final String iString = new String(sorted.get(i));
+            for (int j = i; j < sorted.size(); j++) {
+                final String jString = new String(sorted.get(j));
+                if (iString.compareTo(jString) == 0)
+                    Assert.assertEquals(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)),0);
+                else
+                    Assert.assertTrue(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)) * iString.compareTo(jString) > 0);
+                Assert.assertTrue(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)) <= 0);
+            }
+        }
+    }
+
+    @DataProvider(name="baseComparatorData")
+    public Object[][] baseComparatorData() {
+        final int testCount = 10;
+        final int testSizeAverage = 10;
+        final int testSizeDeviation = 10;
+        final int haplotypeSizeAverage = 100;
+        final int haplotypeSizeDeviation = 100;
+
+        final Object[][] result = new Object[testCount][];
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+        final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
+
+        for (int i = 0; i < testCount; i++) {
+            final int size = (int) Math.max(0,rnd.nextDouble() * testSizeDeviation + testSizeAverage);
+            final ArrayList<byte[]> bases = new ArrayList<>(size);
+            for (int j = 0; j < size; j++) {
+                final int jSize = (int) Math.max(0,rnd.nextDouble() * haplotypeSizeDeviation + haplotypeSizeAverage);
+                final byte[] b = new byte[jSize];
+                for (int k = 0; k < jSize; k++)
+                    b[k] = BaseUtils.baseIndexToSimpleBase(rnd.nextInt(4));
+                bases.add(b);
+            }
+            result[i] = new Object[] { bases };
+        }
+        return result;
+    }
 }
--- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/UtilsUnitTest.java
+++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/UtilsUnitTest.java
@ -236,4 +236,23 @@ public class UtilsUnitTest extends BaseTest {
    public void testTrim(final String s, final int frontTrim, final int backTrim) {
        Assert.assertEquals(s.length() - frontTrim - backTrim, Utils.trimArray(s.getBytes(), frontTrim, backTrim).length);
    }
+
+    @Test(dataProvider = "equalRange", enabled = true)
+    public void testEqualRange(final byte[] array1, final byte[] array2, final int offset1, final int offset2, final int length, final boolean expected) {
+        Assert.assertEquals(Utils.equalRange(array1,offset1,array2,offset2,length),expected);
+        Assert.assertTrue(Utils.equalRange(array1,offset1,array1,offset1,length));
+        Assert.assertTrue(Utils.equalRange(array2,offset2,array2,offset2,length));
+
+    }
+
+    @DataProvider(name = "equalRangeData")
+    public Object[][] equalRangeData() {
+        return new Object[][] {
+                new Object[] { new byte[0] , new byte[0], 0, 0, 0, true},
+                new Object[]  {      "ABCF".getBytes(), "BC".getBytes(), 1,0,2, true },
+                new Object[]  { "ABCF".getBytes(), "".getBytes(), 1,0,0, true },
+                new Object[]  { "ABCF".getBytes(), "ACBF".getBytes(), 0,0, 4, false}
+        };
+
+    }
 }