From cc175bad40e8bd2faad4362fdc8e1294082468b9 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 2 Sep 2014 12:42:13 -0400 Subject: [PATCH] Improve the accuracy of dangling head merging in the HC assembler. Dangling head merging (like with tails) in now enabled by default. The --recoverDanglingHeads argument is now deprecated so that users know not to use it anymore. We now also allow the user to set the minimum branch length for merging. This will be different for exomes and RNA (see below). The other changes in the code itself: 1. We no longer allow an arbitrarily large number of mismatches in the dangling head for merging 2. The max number of mismatches allowed in a dangling head is proportional to the kmer size There will be a difference in the RNA calling pipeline. Instead of invoking '--recoverDanglingHeads' the user will instead want to use '--minDanglingBranchLength 0'. Below are the knowledgebase results of the master branch vs. this one. For NA12878 DNA Exome: master SNPS TRUE_POSITIVE 36722 master SNPS CALLED_NOT_IN_DB_AT_ALL 2699 master SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 292 master SNPS FALSE_POSITIVE_SITE_IS_FP 70 branch SNPS TRUE_POSITIVE 36867 branch SNPS CALLED_NOT_IN_DB_AT_ALL 2952 branch SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 387 branch SNPS FALSE_POSITIVE_SITE_IS_FP 94 As I discussed with Ryan in person, there are a good number of FPs that are called in the new code, but they nearly all have bad strand bias and should be easily filtered by VQSR. Note that there is no change for indels. For NA12878 RNA from Ami: master SNPS TRUE_POSITIVE 11055 master SNPS CALLED_NOT_IN_DB_AT_ALL 831 master SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 44 master SNPS FALSE_POSITIVE_SITE_IS_FP 96 branch SNPS TRUE_POSITIVE 11113 branch SNPS CALLED_NOT_IN_DB_AT_ALL 874 branch SNPS REASONABLE_FILTERS_WOULD_FILTER_FP_SITE 47 branch SNPS FALSE_POSITIVE_SITE_IS_FP 92 Again, there's basically no change for indels. --- .../haplotypecaller/HaplotypeCaller.java | 28 ++++++---- .../haplotypecaller/LocalAssemblyEngine.java | 22 +++----- .../DanglingChainMergingGraph.java | 56 +++++++++++++------ .../readthreading/ReadThreadingAssembler.java | 10 ++-- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +- .../HaplotypeCallerIntegrationTest.java | 34 +++++------ .../DanglingChainMergingGraphUnitTest.java | 5 +- .../ReadThreadingAssemblerUnitTest.java | 4 +- 8 files changed, 92 insertions(+), 73 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index 81363461e..86b790307 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -364,16 +364,22 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false) protected int numPruningSamples = 1; - /** - * This mode is currently experimental and should only be used in the RNA-seq calling pipeline. - */ - @Advanced - @Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="Should we enable dangling head recovery in the read threading assembler?", required = false) - protected boolean recoverDanglingHeads = false; + @Deprecated + @Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="This argument is no longer needed as it is now the default behavior", required = false) + protected boolean DEPRECATED_RecoverDanglingHeads = false; @Hidden - @Argument(fullName="doNotRecoverDanglingTails", shortName="doNotRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false) - protected boolean doNotRecoverDanglingTails = false; + @Argument(fullName="doNotRecoverDanglingBranches", shortName="doNotRecoverDanglingBranches", doc="Should we disable dangling head and tail recovery in the read threading assembler?", required = false) + protected boolean doNotRecoverDanglingBranches = false; + + /** + * When constructing the assembly graph we are often left with "dangling" branches. The assembly engine attempts to rescue these branches + * by merging them back into the main graph. This argument describes the minimum length of a dangling branch needed for the engine to + * try to rescue it. A smaller number here will lead to higher sensitivity to real variation but also to a higher number of false positives. + */ + @Advanced + @Argument(fullName="minDanglingBranchLength", shortName="minDanglingBranchLength", doc="Minimum length of a dangling branch to attempt recovery in the read threading assembler", required = false) + protected int minDanglingBranchLength = 4; @Advanced @Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false) @@ -678,7 +684,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } - + // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class); simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY; @@ -750,8 +756,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In assemblyEngine.setDebug(SCAC.DEBUG); assemblyEngine.setDebugGraphTransformations(debugGraphTransformations); assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths); - assemblyEngine.setRecoverDanglingTails(!doNotRecoverDanglingTails); - assemblyEngine.setRecoverDanglingHeads(recoverDanglingHeads); + assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches); + assemblyEngine.setMinDanglingBranchLength(minDanglingBranchLength); assemblyEngine.setMinBaseQualityToUseInAssembly(MIN_BASE_QUALTY_SCORE); MIN_TAIL_QUALITY = (byte)(MIN_BASE_QUALTY_SCORE - 1); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LocalAssemblyEngine.java index d9bdc6d44..06de21333 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -86,8 +86,8 @@ public abstract class LocalAssemblyEngine { protected boolean debug = false; protected boolean allowCyclesInKmerGraphToGeneratePaths = false; protected boolean debugGraphTransformations = false; - protected boolean recoverDanglingTails = true; - protected boolean recoverDanglingHeads = true; + protected boolean recoverDanglingBranches = true; + protected int minDanglingBranchLength = 0; protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE; protected int pruneFactor = 2; @@ -439,19 +439,11 @@ public abstract class LocalAssemblyEngine { this.debugGraphTransformations = debugGraphTransformations; } - public boolean isRecoverDanglingTails() { - return recoverDanglingTails; + public boolean isRecoverDanglingBranches() { return recoverDanglingBranches; } + + public void setRecoverDanglingBranches(final boolean recoverDanglingBranches) { + this.recoverDanglingBranches = recoverDanglingBranches; } - public void setRecoverDanglingTails(boolean recoverDanglingTails) { - this.recoverDanglingTails = recoverDanglingTails; - } - - public boolean isRecoverDanglingHeads() { - return recoverDanglingHeads; - } - - public void setRecoverDanglingHeads(boolean recoverDanglingHeads) { - this.recoverDanglingHeads = recoverDanglingHeads; - } + public void setMinDanglingBranchLength(final int minDanglingBranchLength) { this.minDanglingBranchLength = minDanglingBranchLength; } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java index 68615492d..33985cf6c 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraph.java @@ -60,8 +60,7 @@ import java.util.*; public abstract class DanglingChainMergingGraph extends BaseGraph { private static final int MAX_CIGAR_COMPLEXITY = 3; - private static final int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA - private static final int MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE = 1; + private int maxMismatchesInDanglingHead = -1; protected boolean alreadyBuilt; @@ -73,6 +72,10 @@ public abstract class DanglingChainMergingGraph extends BaseGraph 0"); // generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths - final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor); + final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength); // if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar, false, true) ) @@ -189,13 +195,14 @@ public abstract class DanglingChainMergingGraph extends BaseGraph 0"); // generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths - final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor); + final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor, minDanglingBranchLength); // if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path if ( danglingHeadMergeResult == null || ! cigarIsOkayToMerge(danglingHeadMergeResult.cigar, true, false) ) @@ -230,7 +237,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor); - if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH ) + if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1 ) // add 1 to include the LCA return null; // now get the reference path from the LCA @@ -340,11 +348,11 @@ public abstract class DanglingChainMergingGraph extends BaseGraph altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor); - if ( altPath == null || isRefSink(altPath.get(0)) ) + if ( altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1 ) // add 1 to include the LCA return null; // now get the reference path from the LCA @@ -477,14 +485,15 @@ public abstract class DanglingChainMergingGraph extends BaseGraph MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE ) - return lastGoodIndex; + if ( ++mismatches > maxMismatches ) + return -1; lastGoodIndex = index; } index++; @@ -493,6 +502,17 @@ public abstract class DanglingChainMergingGraph extends BaseGraph 0 ? maxMismatchesInDanglingHead : Math.max(1, (lengthOfDanglingBranch / kmerSize)); + } + protected boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) { final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 8b9f9b67c..f9d4a7f5f 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -69,7 +69,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { /** The min and max kmer sizes to try when building the graph. */ private final List kmerSizes; - private final int maxAllowedPathsForReadThreadingAssembler; private final boolean dontIncreaseKmerSizesForCycles; private final boolean allowNonUniqueKmersInRef; @@ -85,7 +84,6 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List kmerSizes, final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef, final int numPruningSamples) { super(maxAllowedPathsForReadThreadingAssembler); this.kmerSizes = kmerSizes; - this.maxAllowedPathsForReadThreadingAssembler = maxAllowedPathsForReadThreadingAssembler; this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles; this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef; this.numPruningSamples = numPruningSamples; @@ -159,7 +157,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples); - rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads); + rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingBranches); // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), true); @@ -199,8 +197,10 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if // we can recover them by merging some N bases from the chain back into the reference - if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor); - if ( recoverDanglingHeads ) rtgraph.recoverDanglingHeads(pruneFactor); + if ( recoverDanglingBranches ) { + rtgraph.recoverDanglingTails(pruneFactor, minDanglingBranchLength); + rtgraph.recoverDanglingHeads(pruneFactor, minDanglingBranchLength); + } // remove all heading and trailing paths if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index c480e5b21..a76d0876c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "50f00994d0e9ae043c163425a2073675"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "29f216779f0db9a08f4907ea82f0c7fb"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "e497d592078f999d37af4badef0f3c32"); + "9de64c4405e0dab99c70c2fae54d4841"); } @Test @@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "c44bd3a3bf241fc106496510450746fd"); + "272e096b7dc2839d11343f35e5d5442d"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index ab81fccd9..c0b9140b2 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -85,28 +85,28 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "fd31a5da51e46b021db055177841c8c2"); + HCTest(CEUTRIO_BAM, "", "7bc718c6a604405e6d33c3073630cc66"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "4f80b7c7e448d719e308b7d73751bebc"); + HCTest(NA12878_BAM, "", "07125724eb465a739df9c6ab191216b0"); } @Test public void testHaplotypeCallerMultiSampleHaploid() { HCTest(CEUTRIO_BAM, - "-ploidy 1", "466d423a3bacd0032549bda987fb1574"); + "-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576"); } @Test public void testHaplotypeCallerSingleSampleHaploid() { - HCTest(NA12878_BAM, "-ploidy 1", "f588569b6d14246b7e6678ac80286012"); + HCTest(NA12878_BAM, "-ploidy 1", "e5c8a8bfb4d9d522e610a2299a9b32ad"); } @Test public void testHaplotypeCallerSingleSampleTetraploid() { - HCTest(NA12878_BAM, "-ploidy 4", "95d4cca48fcbb82d5e41cfe60d68531e"); + HCTest(NA12878_BAM, "-ploidy 4", "af711f92b33d3f42e87a719745d93a68"); } @Test @@ -126,41 +126,41 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedSingleSample() { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "4577a7c82300950eff0fae10aa4cc4ae"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "79b0f8fa5e42ef23f3d166d84d92fa23"); } @Test public void testHaplotypeCallerGraphBasedMultiSampleHaploid() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "2ce7ef54258261c9e869fc69db3312e3"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da"); } @Test public void testHaplotypeCallerGraphBasedMultiSample() { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "958511863caca47833b1df03a2f1e130"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "10119fbb494e966a6f1b54307cfbeb8b"); } @Test public void testHaplotypeCallerSingleSampleWithDbsnp() { - HCTest(NA12878_BAM, "-D " + b37dbSNP132, "8294e5a78f2e1091369ae5d9735533cd"); + HCTest(NA12878_BAM, "-D " + b37dbSNP132, "fded195a0436242673718e6dc083c172"); } @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" + " -isr INTERSECTION -L " + GGA_INTERVALS_FILE, - "368f42869cc651c8fd94ec2a572bb545"); + "5bc8892b68e281a3ceb4f2d141f8c723"); } @Test public void testHaplotypeCallerMultiSampleGGAHaploid() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "52494fae89dd37abd0029013557dfa9d"); + "d1e872b1d9c484e94f826255e5dde548"); } @Test public void testHaplotypeCallerMultiSampleGGATetraploid() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "6a70e9d87459f274bc9b20f9b97ea274"); + "874f5fc9d7eed8894f72b8d2587de4b6"); } @Test @@ -176,7 +176,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "77f56e9211b13690ad4c30f40469381a"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f2242d1696ef542196d363cf56159851"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -223,7 +223,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2be5ac87d698d7f92525a9721e66ed94")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cf6fbb3636c52cd47dd14e0bd415a320")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @@ -272,7 +272,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("defb00b7f19a4626a1ace1bbf4fdf81d")); + Arrays.asList("86fb942473b3f8df2f8865209e551200")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -289,7 +289,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,090,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("6d221ed702f7f241e1af6f9016754a50")); + Arrays.asList("b6dab8a6223afeb9d0fa7c178c84c024")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -359,7 +359,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testMissingKeyAlternativeHaplotypesBugFix() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list"); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ca3d7b611b57835724fabd78f202bede")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("423b70c151a5d0028e104aa3372b8783")); spec.disableShadowBCF(); executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraphUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraphUnitTest.java index 7078753be..b9e3d8620 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraphUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/DanglingChainMergingGraphUnitTest.java @@ -119,7 +119,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest { Assert.assertTrue(altSink != null, "We did not find a non-reference sink"); // confirm that the SW alignment agrees with our expectations - final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0); + final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0, 4); if ( result == null ) { Assert.assertFalse(cigarIsGood); @@ -209,6 +209,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest { rtgraph.addSequence("ref", ref.getBytes(), true); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M"); rtgraph.addRead(read); + rtgraph.setMaxMismatchesInDanglingHead(10); rtgraph.buildGraphIfNecessary(); // confirm that we have just a single dangling head @@ -223,7 +224,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest { Assert.assertTrue(altSource != null, "We did not find a non-reference source"); // confirm that the SW alignment agrees with our expectations - final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0); + final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0, 1); if ( result == null ) { Assert.assertFalse(shouldBeMerged); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java index 4e90c71cf..430d47069 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssemblerUnitTest.java @@ -85,7 +85,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest { public SeqGraph assemble() { assembler.removePathsNotConnectedToRef = false; // needed to pass some of the tests - assembler.setRecoverDanglingTails(false); // needed to pass some of the tests + assembler.setRecoverDanglingBranches(false); // needed to pass some of the tests assembler.setDebugGraphTransformations(true); final SeqGraph graph = assembler.assemble(reads, refHaplotype, Collections.emptyList()).get(0).getGraph(); if ( DEBUG ) graph.printGraph(new File("test.dot"), 0); @@ -171,7 +171,7 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest { Assert.assertNotNull(graph.getReferenceSinkVertex()); final List paths = new KBestHaplotypeFinder(graph); - Assert.assertEquals(paths.size(), 2); + Assert.assertEquals(paths.size(), 1); } @Test(enabled = ! DEBUG)