From 1d97b4a191776b13a1e1506596ee869183828e81 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 2 Jul 2014 14:59:18 -0400 Subject: [PATCH] Improved tail merging: now tails can be merged to branches that are not entirely reference. This is useful for e.g. cases where there are SNPs on insertions. Before tails were forced to be merged (incorrectly) only to a reference node, but now they can be merged to any path in the graph from which they directly branch. Also, I've transferred over Ryan's code to refuse to process kmer sizes such that there are non-unique kmers in the reference sequence with them. --- .../haplotypecaller/HaplotypeCaller.java | 6 +++- .../haplotypecaller/graphs/BaseGraph.java | 28 ++++++++++++++++-- .../DanglingChainMergingGraph.java | 26 +++++++++-------- .../readthreading/ReadThreadingAssembler.java | 22 ++++++++++---- .../readthreading/ReadThreadingGraph.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +-- .../HaplotypeCallerGVCFIntegrationTest.java | 10 +++---- .../HaplotypeCallerIntegrationTest.java | 29 ++++++++++--------- ...aplotypeCallerParallelIntegrationTest.java | 2 +- 9 files changed, 85 insertions(+), 44 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 994ba209d..000671fd3 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -273,6 +273,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="dontIncreaseKmerSizesForCycles", shortName="dontIncreaseKmerSizesForCycles", doc="Should we disable the iterating over kmer sizes when graph cycles are detected?", required = false) protected boolean dontIncreaseKmerSizesForCycles = false; + @Advanced + @Argument(fullName="allowNonUniqueKmersInRef", shortName="allowNonUniqueKmersInRef", doc="Should we allow graphs which have non-unique kmers in the reference?", required = false) + protected boolean allowNonUniqueKmersInRef = false; + @Advanced @Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false) protected int numPruningSamples = 1; @@ -615,7 +619,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // create and setup the assembler - assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); + assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples); assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java index c31ef8469..b69e6d997 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/graphs/BaseGraph.java @@ -260,15 +260,39 @@ public class BaseGraph extends Default /** * Traverse the graph and get the next reference vertex if it exists * @param v the current vertex, can be null - * @return the next reference vertex if it exists + * @return the next reference vertex if it exists, otherwise null */ public V getNextReferenceVertex( final V v ) { + return getNextReferenceVertex(v, false, Collections.emptyList()); + } + + /** + * Traverse the graph and get the next reference vertex if it exists + * @param v the current vertex, can be null + * @param allowNonRefPaths if true, allow sub-paths that are non-reference if there is only a single outgoing edge + * @param blacklistedEdges edges to ignore in the traversal down; useful to exclude the non-reference dangling paths + * @return the next vertex (but not necessarily on the reference path if allowNonRefPaths is true) if it exists, otherwise null + */ + public V getNextReferenceVertex( final V v, final boolean allowNonRefPaths, final Collection blacklistedEdges ) { if( v == null ) { return null; } - for( final E edgeToTest : outgoingEdgesOf(v) ) { + + // variable must be mutable because outgoingEdgesOf is an immutable collection + Set edges = outgoingEdgesOf(v); + + for( final E edgeToTest : edges ) { if( edgeToTest.isRef() ) { return getEdgeTarget(edgeToTest); } } + + // if we got here, then we aren't on a reference path + if ( allowNonRefPaths ) { + edges = new HashSet<>(edges); // edges was immutable + edges.removeAll(blacklistedEdges); + if ( edges.size() == 1 ) + return getEdgeTarget(edges.iterator().next()); + } + return null; } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java index 09a5a001f..aa8bee8b3 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java @@ -315,13 +315,13 @@ public abstract class DanglingChainMergingGraph extends BaseGraph altPath = findPathUpwardsToLowestCommonAncestorOfReference(vertex, pruneFactor); + // find the lowest common ancestor path between this vertex and the diverging master path if available + final List altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor); if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH ) return null; // now get the reference path from the LCA - final List refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards); + final List refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Arrays.asList(incomingEdgeOf(altPath.get(1)))); // create the Smith-Waterman strings to use final byte[] refBases = getBasesForPath(refPath, false); @@ -348,7 +348,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards); + final List refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Collections.emptyList()); // create the Smith-Waterman strings to use final byte[] refBases = getBasesForPath(refPath, true); @@ -360,19 +360,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph findPathUpwardsToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) { + protected List findPathUpwardsToLowestCommonAncestor(final MultiDeBruijnVertex vertex, final int pruneFactor) { final LinkedList path = new LinkedList<>(); MultiDeBruijnVertex v = vertex; - while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) { + while ( inDegreeOf(v) == 1 && outDegreeOf(v) < 2 ) { final MultiSampleEdge edge = incomingEdgeOf(v); // if it has too low a weight, don't use it (or previous vertexes) for the path if ( edge.getPruningMultiplicity() < pruneFactor ) @@ -384,7 +384,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph 1 ? path : null; } /** @@ -426,17 +426,19 @@ public abstract class DanglingChainMergingGraph extends BaseGraph getReferencePath(final MultiDeBruijnVertex start, final TraversalDirection direction) { - if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path"); + protected List getReferencePath(final MultiDeBruijnVertex start, + final TraversalDirection direction, + final Collection blacklistedEdges) { final List path = new ArrayList<>(); MultiDeBruijnVertex v = start; while ( v != null ) { path.add(v); - v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v) : getPrevReferenceVertex(v)); + v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v, true, blacklistedEdges) : getPrevReferenceVertex(v)); } return path; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 77cc81b48..8b9f9b67c 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -72,6 +72,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { private final int maxAllowedPathsForReadThreadingAssembler; private final boolean dontIncreaseKmerSizesForCycles; + private final boolean allowNonUniqueKmersInRef; private final int numPruningSamples; protected boolean removePathsNotConnectedToRef = true; private boolean justReturnRawGraph = false; @@ -81,16 +82,17 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { this(DEFAULT_NUM_PATHS_PER_GRAPH, Arrays.asList(25)); } - public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final int numPruningSamples) { + public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) { super(maxAllowedPathsForReadThreadingAssembler); this.kmerSizes = kmerSizes; this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler; this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles; + this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef; this.numPruningSamples = numPruningSamples; } - public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { - this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, 1); + protected ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes) { + this(maxAllowedPathsForReadThreadingAssembler, kmerSizes, true, true, 1); } /** for testing purposes */ @@ -109,7 +111,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // first, try using the requested kmer sizes for ( final int kmerSize : kmerSizes ) { - addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles)); + addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef)); } // if none of those worked, iterate over larger sizes if allowed to do so @@ -118,7 +120,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { int numIterations = 1; while ( results.isEmpty() && numIterations <= MAX_KMER_ITERATIONS_TO_ATTEMPT ) { // on the last attempt we will allow low complexity graphs - addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT)); + final boolean lastAttempt = numIterations == MAX_KMER_ITERATIONS_TO_ATTEMPT; + addResult(results, createGraph(reads, refHaplotype, kmerSize, givenHaplotypes, lastAttempt, lastAttempt)); kmerSize += KMER_SIZE_ITERATION_INCREASE; numIterations++; } @@ -135,18 +138,25 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { * @param kmerSize kmer size * @param activeAlleleHaplotypes the GGA haplotypes to inject into the graph * @param allowLowComplexityGraphs if true, do not check for low-complexity graphs + * @param allowNonUniqueKmersInRef if true, do not fail if the reference has non-unique kmers * @return sequence graph or null if one could not be created (e.g. because it contains cycles or too many paths or is low complexity) */ protected AssemblyResult createGraph(final List reads, final Haplotype refHaplotype, final int kmerSize, final List activeAlleleHaplotypes, - final boolean allowLowComplexityGraphs) { + final boolean allowLowComplexityGraphs, + final boolean allowNonUniqueKmersInRef) { if ( refHaplotype.length() < kmerSize ) { // happens in cases where the assembled region is just too small return new AssemblyResult(AssemblyResult.Status.FAILED, null); } + if ( !allowNonUniqueKmersInRef && !ReadThreadingGraph.determineNonUniqueKmers(new SequenceForKmers("ref", refHaplotype.getBases(), 0, refHaplotype.getBases().length, 1, true), kmerSize).isEmpty() ) { + if ( debug ) logger.info("Not using kmer size of " + kmerSize + " in read threading assembler because reference contains non-unique kmers"); + return null; + } + final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples); rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index d34b66873..f98b39fcb 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -435,7 +435,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme * @param kmerSize the size of the kmers * @return a non-null collection of non-unique kmers in sequence */ - private Collection determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) { + static protected Collection determineNonUniqueKmers(final SequenceForKmers seqForKmers, final int kmerSize) { // count up occurrences of kmers within each read final KMerCounter counter = new KMerCounter(kmerSize); final int stopPosition = seqForKmers.stop - kmerSize; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 14913d46a..d820ea259 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "b63bf0f2848f28e52fd5c7621bbd27c7"); + "3c02e454fe6d0e296effd99fbd3cba4c"); } private void HCTestComplexConsensusMode(String bam, String args, String md5) { @@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "972a726b0ba476f6215bf162439db2cd"); + "f0560ba2ca20e5202181d0e57b1ee8cf"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 7a5b1e1af..77fbed95b 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -67,9 +67,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "662481d96a58c5475dc4752466a8d3b2"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "d9f7fd119eec40b4610cb4aae1cbed75"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "cbf6d876045051a68aca784491cca6cf"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "f321fadf3d6b4608536fba1015e9693a"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8d5626262b255ce6d4a53394ea8cd30f"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "126c94d06e00c67380051c4924054841"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e6ec90da053a612f0c615e221eb34baa"}); @@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testWrongGVCFNonVariantRecordOrderBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5fbc88cf1136212afac1fd0b7e0e8ce8")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fecd24420579f321157e5605b6cd7f2")); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } @@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testNoCallGVCFMissingPLsBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1b77bc92962fa10b5eec86fe9400c528")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("95bae0b4a8fe9bfcf71762704b22a76e")); spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 25b93da1c..9993764a9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "b66f8d08e0b94ddc52bd06dadbb9b299"); + HCTest(CEUTRIO_BAM, "", "60e2f0c3ce33a05c060035d86bc79543"); } @Test @@ -94,17 +94,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMinBaseQuality() { - HCTest(NA12878_BAM, "-mbq 15", "f753c9e284eafb1424b7f9d88193fdee"); + HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46"); } @Test public void testHaplotypeCallerGraphBasedSingleSample() { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "5a49b5b98247070e8de637a706b02db9"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "07910f50710349eacd2560452fac3e8d"); } @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "f200260127d0f63cc61ce5f2287bd5a0"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "c5ef449a46b80b69dde87aa52041fe50"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -115,7 +115,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "4b1271aaa70f46ed6c987cdf4610b7f0"); + "b61e0bdf0e3180cb4f5abd3491b05aa6"); } @Test @@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "575850d9a8195000a2ef747f838aaf77"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "ec90508248b82800eb2596348a7cf85b"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -199,7 +199,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestDanglingTailMergingForDeletions() throws IOException { - final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800"; + final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef"; final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("")); final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0); @@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("cf314806fcb0847e2ed99c7013e5d55d")); + Arrays.asList("10a7a3d49cb6cb172ae8404996c524df")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("0e68c82163d4fee30b4135d4ebd1a3ba")); + Arrays.asList("14384fe06359bf35e11d3802217e1a23")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestAggressivePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("be7bf9e8f78d6a9ee569b750054991ac")); + Arrays.asList("daf2a533d83f1a5fd7d0e5a92d67fd64")); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); } @@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("01b716d421dc33f7a6bc8c387b7e3322")); + Arrays.asList("69bccf72c900361775d076d7d131955d")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } @@ -313,7 +313,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testMissingKeyAlternativeHaplotypesBugFix() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list"); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cd33c69a34a66b5baca72841fe8a2bc0")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4322083ad7decca26a3257be2f1d0347")); spec.disableShadowBCF(); executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec); } @@ -336,9 +336,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { // but please make sure that both outputs get the same variant, // alleles all with DBSNP ids // We test here that change in active region size does not have an effect in placement of indels. - final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList("0f87fd95350fade475fe3c5df8dd9e08")); + final String md5 = "54c2cea6e2266fe3ff05c1b5d58bb609"; + final WalkerTestSpec shortSpec = new WalkerTestSpec(commandLineShortInterval + " -o %s",Arrays.asList(md5)); executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::shortInterval",shortSpec); - final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList("b3a2853d51a7817999a3be8e72d9ac23")); + final WalkerTestSpec longSpec = new WalkerTestSpec(commandLineLongInterval + " -o %s",Arrays.asList(md5)); executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index 1cc0df52b..320ba1e09 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -60,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList<>(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "a2718251ffae9db885b7f74b33dd5b57"}); + tests.add(new Object[]{nct, "af29c396835b86d88f366629884e6ba4"}); } return tests.toArray(new Object[][]{});