Merge pull request #679 from broadinstitute/eb_better_tail_merging_PT74222522

Improved tail merging: now tails can be merged to branches that are not ...
2014-07-10 13:54:33 -04:00 · 2014-07-10 13:54:33 -04:00 · 193e389b41
parent 598b481733 1d97b4a191
commit 193e389b41
9 changed files with 85 additions and 44 deletions
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java
@ -273,6 +273,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
    @Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false)
    protected boolean dontIncreaseKmerSizesForCycles = false;

+    @Advanced
+    @Argument(fullName="allowNonUniqueKmersInRef", shortName="allowNonUniqueKmersInRef", doc="Should we allow graphs which have non-unique kmers in the reference?", required = false)
+    protected boolean allowNonUniqueKmersInRef = false;
+
    @Advanced
    @Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
    protected int numPruningSamples = 1;
@ -615,7 +619,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
        }

        // create and setup the assembler
-        assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples);
+        assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples);

        assemblyEngine.setErrorCorrectKmers(errorCorrectKmers);
        assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR);
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java
@ -260,15 +260,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
    /**
     * Traverse the graph and get the next reference vertex if it exists
     * @param v the current vertex, can be null
-     * @return  the next reference vertex if it exists
+     * @return  the next reference vertex if it exists, otherwise null
     */
    public V getNextReferenceVertex( final V v ) {
+        return getNextReferenceVertex(v, false, Collections.<MultiSampleEdge>emptyList());
+    }
+
+    /**
+     * Traverse the graph and get the next reference vertex if it exists
+     * @param v the current vertex, can be null
+     * @param allowNonRefPaths if true, allow sub-paths that are non-reference if there is only a single outgoing edge
+     * @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
+     * @return the next vertex (but not necessarily on the reference path if allowNonRefPaths is true) if it exists, otherwise null
+     */
+    public V getNextReferenceVertex( final V v, final boolean allowNonRefPaths, final Collection<MultiSampleEdge> blacklistedEdges ) {
        if( v == null ) { return null; }
-        for( final E edgeToTest : outgoingEdgesOf(v) ) {
+
+        // variable must be mutable because outgoingEdgesOf is an immutable collection
+        Set<E> edges = outgoingEdgesOf(v);
+
+        for( final E edgeToTest : edges ) {
            if( edgeToTest.isRef() ) {
                return getEdgeTarget(edgeToTest);
            }
        }
+
+        // if we got here, then we aren't on a reference path
+        if ( allowNonRefPaths ) {
+            edges = new HashSet<>(edges);  // edges was immutable
+            edges.removeAll(blacklistedEdges);
+            if ( edges.size() == 1 )
+                return getEdgeTarget(edges.iterator().next());
+        }
+
        return null;
    }

--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java
@ -315,13 +315,13 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
     */
    protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {

-        // find the lowest common ancestor path between vertex and the reference sink if available
-        final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestorOfReference(vertex, pruneFactor);
+        // find the lowest common ancestor path between this vertex and the diverging master path if available
+        final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor);
        if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
            return null;

        // now get the reference path from the LCA
-        final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards);
+        final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Arrays.asList(incomingEdgeOf(altPath.get(1))));

        // create the Smith-Waterman strings to use
        final byte[] refBases = getBasesForPath(refPath, false);
@ -348,7 +348,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
            return null;

        // now get the reference path from the LCA
-        final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards);
+        final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Collections.<MultiSampleEdge>emptyList());

        // create the Smith-Waterman strings to use
        final byte[] refBases = getBasesForPath(refPath, true);
@ -360,19 +360,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
    }

    /**
-     * Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex.
+     * Finds the path upwards in the graph from this vertex to the first diverging node, including that (lowest common ancestor) vertex.
     * Note that nodes are excluded if their pruning weight is less than the pruning factor.
     *
     * @param vertex   the original vertex
     * @param pruneFactor  the prune factor to use in ignoring chain pieces
-     * @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
+     * @return the path if it can be determined or null if this vertex either doesn't merge onto another path or
     *  has an ancestor with multiple incoming edges before hitting the reference path
     */
-    protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
+    protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestor(final MultiDeBruijnVertex vertex, final int pruneFactor) {
        final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();

        MultiDeBruijnVertex v = vertex;
-        while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) {
+        while ( inDegreeOf(v) == 1 && outDegreeOf(v) < 2 ) {
            final MultiSampleEdge edge = incomingEdgeOf(v);
            // if it has too low a weight, don't use it (or previous vertexes) for the path
            if ( edge.getPruningMultiplicity() < pruneFactor )
@ -384,7 +384,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
        }
        path.addFirst(v);

-        return isReferenceNode(v) ? path : null;
+        return outDegreeOf(v) > 1 ? path : null;
    }

    /**
@ -426,17 +426,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
     *
     * @param start   the reference vertex to start from
     * @param direction describes which direction to move in the graph (i.e. down to the reference sink or up to the source)
+     * @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths
     * @return the path (non-null, non-empty)
     */
-    protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start, final TraversalDirection direction) {
-        if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path");
+    protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start,
+                                                         final TraversalDirection direction,
+                                                         final Collection<MultiSampleEdge> blacklistedEdges) {

        final List<MultiDeBruijnVertex> path = new ArrayList<>();

        MultiDeBruijnVertex v = start;
        while ( v != null ) {
            path.add(v);
-            v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v) : getPrevReferenceVertex(v));
+            v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v, true, blacklistedEdges) : getPrevReferenceVertex(v));
        }

        return path;
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java
@ -72,6 +72,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
    private final int maxAllowedPathsForReadThreadingAssembler;

    private final boolean dontIncreaseKmerSizesForCycles;
+    private final boolean allowNonUniqueKmersInRef;
    private final int numPruningSamples;
    protected boolean removePathsNotConnectedToRef = true;
    private boolean justReturnRawGraph = false;
@ -81,16 +82,17 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
        this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25));
    }

-    public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final int numPruningSamples) {
+    public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) {
        super(maxAllowedPathsForReadThreadingAssembler);
        this.kmerSizes = kmerSizes;
        this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler;
        this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
+        this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
        this.numPruningSamples = numPruningSamples;
    }

-    public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
-        this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, 1);
+    protected ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes) {
+        this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, true, 1);
    }

    /** for testing purposes */
@ -109,7 +111,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {

        // first, try using the requested kmer sizes
        for ( final int kmerSize : kmerSizes ) {
-            addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles));
+            addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef));
        }

        // if none of those worked, iterate over larger sizes if allowed to do so
@ -118,7 +120,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
            int numIterations = 1;
            while ( results.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) {
                // on the last attempt we will allow low complexity graphs
-                addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT));
+                final boolean lastAttempt = numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT;
+                addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, lastAttempt, lastAttempt));
                kmerSize += KMER_SIZE_ITERATION_INCREASE;
                numIterations++;
            }
@ -135,18 +138,25 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
     * @param kmerSize         kmer size
     * @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph
     * @param allowLowComplexityGraphs if true, do not check for low-complexity graphs
+     * @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers
     * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity)
     */
    protected AssemblyResult createGraph(final List<GATKSAMRecord> reads,
                                         final Haplotype refHaplotype,
                                         final int kmerSize,
                                         final List<Haplotype> activeAlleleHaplotypes,
-                                         final boolean allowLowComplexityGraphs) {
+                                         final boolean allowLowComplexityGraphs,
+                                         final boolean allowNonUniqueKmersInRef) {
        if ( refHaplotype.length() < kmerSize ) {
            // happens in cases where the assembled region is just too small
            return new AssemblyResult(AssemblyResult.Status.FAILED, null);
        }

+        if ( !allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty() ) {
+            if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers");
+            return null;
+        }
+
        final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples);

        rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads);
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
@ -435,7 +435,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
     * @param kmerSize the size of the kmers
     * @return a non-null collection of non-unique kmers in sequence
     */
-    private Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
+    static protected Collection<Kmer> determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) {
        // count up occurrences of kmers within each read
        final KMerCounter counter = new KMerCounter(kmerSize);
        final int stopPosition = seqForKmers.stop - kmerSize;
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
-                "b63bf0f2848f28e52fd5c7621bbd27c7");
+                "3c02e454fe6d0e296effd99fbd3cba4c");
    }

    private void HCTestComplexConsensusMode(String bam, String args, String md5) {
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
    @Test
    public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
        HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
-                "972a726b0ba476f6215bf162439db2cd");
+                "f0560ba2ca20e5202181d0e57b1ee8cf");
    }

 }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java
@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
        final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;

        // this functionality can be adapted to provide input data for whatever you might want in your data
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "662481d96a58c5475dc4752466a8d3b2"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d9f7fd119eec40b4610cb4aae1cbed75"});
-        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "cbf6d876045051a68aca784491cca6cf"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "f321fadf3d6b4608536fba1015e9693a"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8d5626262b255ce6d4a53394ea8cd30f"});
+        tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "126c94d06e00c67380051c4924054841"});
        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
        tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e6ec90da053a612f0c615e221eb34baa"});
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testWrongGVCFNonVariantRecordOrderBugFix() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
                b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5fbc88cf1136212afac1fd0b7e0e8ce8"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fecd24420579f321157e5605b6cd7f2"));
        spec.disableShadowBCF();
        executeTest("testMissingGVCFIndexingStrategyException", spec);
    }
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
    public void testNoCallGVCFMissingPLsBugFix() {
        final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
                b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1b77bc92962fa10b5eec86fe9400c528"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("95bae0b4a8fe9bfcf71762704b22a76e"));
        spec.disableShadowBCF();
        executeTest("testNoCallGVCFMissingPLsBugFix", spec);
    }
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "b66f8d08e0b94ddc52bd06dadbb9b299");
+        HCTest(CEUTRIO_BAM, "", "60e2f0c3ce33a05c060035d86bc79543");
    }

    @Test
@ -94,17 +94,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerMinBaseQuality() {
-        HCTest(NA12878_BAM, "-mbq 15", "f753c9e284eafb1424b7f9d88193fdee");
+        HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46");
    }

    @Test
    public void testHaplotypeCallerGraphBasedSingleSample() {
-        HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9");
+        HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "07910f50710349eacd2560452fac3e8d");
    }

    @Test
    public void testHaplotypeCallerGraphBasedMultiSample() {
-        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f200260127d0f63cc61ce5f2287bd5a0");
+        HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "c5ef449a46b80b69dde87aa52041fe50");
    }

    @Test(enabled = false) // can't annotate the rsID's yet
@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    @Test
    public void testHaplotypeCallerMultiSampleGGA() {
        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
-                "4b1271aaa70f46ed6c987cdf4610b7f0");
+                "b61e0bdf0e3180cb4f5abd3491b05aa6");
    }

    @Test
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "575850d9a8195000a2ef747f838aaf77");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec90508248b82800eb2596348a7cf85b");
    }

    private void HCTestNearbySmallIntervals(String bam, String args, String md5) {
@ -199,7 +199,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {

    @Test
    public void HCTestDanglingTailMergingForDeletions() throws IOException {
-        final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800";
+        final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef";
        final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
        final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0);

@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("cf314806fcb0847e2ed99c7013e5d55d"));
+                Arrays.asList("10a7a3d49cb6cb172ae8404996c524df"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestDBSNPAnnotationWGSGraphBased() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
-                Arrays.asList("0e68c82163d4fee30b4135d4ebd1a3ba"));
+                Arrays.asList("14384fe06359bf35e11d3802217e1a23"));
        executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
    }

@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestAggressivePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
-                Arrays.asList("be7bf9e8f78d6a9ee569b750054991ac"));
+                Arrays.asList("daf2a533d83f1a5fd7d0e5a92d67fd64"));
        executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
    }

@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void HCTestConservativePcrIndelModelWGS() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
-                Arrays.asList("01b716d421dc33f7a6bc8c387b7e3322"));
+                Arrays.asList("69bccf72c900361775d076d7d131955d"));
        executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
    }

@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
    public void testMissingKeyAlternativeHaplotypesBugFix() {
        final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
                b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list");
-        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cd33c69a34a66b5baca72841fe8a2bc0"));
+        final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4322083ad7decca26a3257be2f1d0347"));
        spec.disableShadowBCF();
        executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec);
    }
@ -336,9 +336,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
        // but please make sure that both outputs get the same variant,
        // alleles all with DBSNP ids
        // We test here that change in active region size does not have an effect in placement of indels.
-        final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("0f87fd95350fade475fe3c5df8dd9e08"));
+        final String md5 = "54c2cea6e2266fe3ff05c1b5d58bb609";
+        final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList(md5));
        executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec);
-        final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("b3a2853d51a7817999a3be8e72d9ac23"));
+        final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList(md5));
        executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
    }

--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java
@ -60,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
        List<Object[]> tests = new ArrayList<>();

        for ( final int nct : Arrays.asList(1, 2, 4) ) {
-            tests.add(new Object[]{nct, "a2718251ffae9db885b7f74b33dd5b57"});
+            tests.add(new Object[]{nct, "af29c396835b86d88f366629884e6ba4"});
        }

        return tests.toArray(new Object[][]{});