From 258b2bce289cbe77cc7fc8c5ca3ff18a9c29e6c7 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Sat, 29 Mar 2014 17:27:35 -0400 Subject: [PATCH] Fix loss of key alternative haplotypes due to a change on threading start policy required when recovering dangling heads. Story: - https://www.pivotaltracker.com/story/show/67601310 Change: - Unless recover-danging-heads is active, the threading starting location policy is the original one. i.e. just at already existing unique kmer vertices. Tests: - HaplotypeCallerIntegrationTest#testMissingKeyAlternativeHaplotypesBugFix --- .../readthreading/ReadThreadingAssembler.java | 2 + .../readthreading/ReadThreadingGraph.java | 39 ++++++++++++++++++- .../VariantAnnotatorIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +-- .../HaplotypeCallerIntegrationTest.java | 23 ++++++++--- 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 30b677fe9..7c5971e03 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -149,6 +149,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize, debugGraphTransformations, minBaseQualityToUseInAssembly, numPruningSamples); + rtgraph.setThreadingStartOnlyAtExistingVertex(!recoverDanglingHeads); + // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), true); diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index e03e26e0a..06ee15a47 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -66,6 +66,8 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme private final static boolean WRITE_GRAPH = false; private final static boolean DEBUG_NON_UNIQUE_CALC = false; + private boolean startThreadingOnlyAtExistingVertex = false; + /** for debugging info printing */ private static int counter = 0; @@ -252,13 +254,48 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) { final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize); - if ( !nonUniqueKmers.contains(kmer1) ) + if ( isThreadingStart(kmer1) ) return i; } return -1; } + /** + * Checks whether a kmer can be the threading start based on the current threading start location policy. + * + * @see #setThreadingStartOnlyAtExistingVertex(boolean) + * @see #getThreadingStartOnlyAtExistingVertex() + * + * @param kmer the query kmer. + * @return {@code true} if we can start thread the sequence at this kmer, {@code false} otherwise. + */ + protected boolean isThreadingStart(final Kmer kmer) { + if (kmer == null) + throw new IllegalArgumentException(); + return startThreadingOnlyAtExistingVertex ? uniqueKmers.containsKey(kmer) : !nonUniqueKmers.contains(kmer); + } + + /** + * Changes the threading start location policy. + * + * @param value {@code true} if threading will start only at existing vertices in the graph, {@code false} if + * it can start at any unique kmer. + */ + public void setThreadingStartOnlyAtExistingVertex(final boolean value) { + startThreadingOnlyAtExistingVertex = value; + } + + /** + * Indicates the threading start location policy. + * + * @return {@code true} if threading will start only at existing vertices in the graph, {@code false} if + * it can start at any unique kmer. + */ + public boolean getThreadingStartOnlyAtExistingVertex() { + return startThreadingOnlyAtExistingVertex; + } + /** * Build the read threaded assembly graph if it hasn't already been constructed from the sequences that have * been added to the graph. diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 0ed7eb2e8..287cd45d0 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0); final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth"; - final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("388200d107fb47326df78a971a52698f")); + final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011")); specAnn.disableShadowBCF(); final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 24e47879d..ec4291e1d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7278afd47e5851c954359441cac2f0b8"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "cbdd34c454d69b266e3681ddfc33c7a3"); + "724a05b7df716647014f29c0fe86e071"); } @Test @@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "61972c7c0d378e756f3b4d99aed9d0cf"); + "9689b89bea8282137fade0905b5f2716"); } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d4186f610..7271b2642 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -89,7 +89,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "c208ef58d464465c68b5c26501122ad7"); + HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465"); } @Test @@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedSingleSample() { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "049ba1794a1ce2b15566bb1e9431fccf"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999"); } @Test @@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2")); + Arrays.asList("2b240d51aa9b0d1f65f4e899a2feb97e")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("eda8f91091fe462205d687ec49fc61e7")); + Arrays.asList("6f6213bfc62e9cd9db56a7277ebe04e0")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b")); + Arrays.asList("7592274ecd2b5ef4624dd2ed659536ef")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } @@ -309,13 +309,24 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); } + @Test(priority= -3) + public void testMissingKeyAlternativeHaplotypesBugFix() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", + b37KGReferenceWithDecoy, privateTestDir + "lost-alt-key-hap.bam", privateTestDir + "lost-alt-key-hap.interval_list"); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e33053579a21d4cb23ba84107595891f")); + spec.disableShadowBCF(); + executeTest("testMissingKeyAlternativeHaplotypesBugFix", spec); + } + @Test public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() { final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list", HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("0f6384c8e170840ef1490d262ac2e06e")); spec.disableShadowBCF(); executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec); } + + }