From be66049a6fd9ce7eb1494d40c57d9d1a20dd151b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 21 Apr 2013 15:42:26 -0400 Subject: [PATCH] Bugfix for CommonSuffixSplitter -- The problem is that the common suffix splitter could eliminate the reference source vertex when there's an incoming node that contains all of the reference source vertex bases and then some additional prefix bases. In this case we'd eliminate the reference source vertex. Fixed by checking for this condition and aborting the simplification -- Update MD5s, including minor improvements --- .../graphs/CommonSuffixSplitter.java | 17 +++++++++++++++++ ...mplexAndSymbolicVariantsIntegrationTest.java | 6 +++--- .../HaplotypeCallerIntegrationTest.java | 4 ++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java index e37fbb281..0665186c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java @@ -100,6 +100,8 @@ public class CommonSuffixSplitter { final SeqVertex suffixVTemplate = commonSuffix(toSplit); if ( suffixVTemplate.isEmpty() ) { return false; + } else if ( wouldEliminateRefSource(graph, suffixVTemplate, toSplit) ) { + return false; } else if ( allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit) ) { return false; } else { @@ -141,6 +143,21 @@ public class CommonSuffixSplitter { } } + /** + * Would factoring out this suffix result in elimating the reference source vertex? + * @param graph the graph + * @param commonSuffix the common suffix of all toSplits + * @param toSplits the list of vertices we're are trying to split + * @return true if toSplit contains the reference source and this ref source has all and only the bases of commonSuffix + */ + private boolean wouldEliminateRefSource(final SeqGraph graph, final SeqVertex commonSuffix, final Collection toSplits) { + for ( final SeqVertex toSplit : toSplits ) { + if ( graph.isRefSource(toSplit) ) + return toSplit.length() == commonSuffix.length(); + } + return false; + } + // private static int counter = 0; /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 57d8aa92c..17f04971b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "57e13aed8dc483514ac15fb757aee1d1"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd51f8c7235eb6547b678093c7a01089"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "d89c8a32e9c54f66e0331382cac86b27"); + "ed3b577e6f7d68bba6774a62d9df9cd9"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "89a28d4290523dd55117bc4e44212d73"); + "a594a28d8053c3e969c39de81a9d03d6"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 4e291cb59..500db6ae9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "3e2e4a62c6c60d432fa1ca32aee2635b"); + "28c3b1f276ec8198801aafe880e40fb6"); } @Test @@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ab518ae32535714604a4ffc71fe42511")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cac0d88fa4471c7a0ac96533a9a6354b")); executeTest("HCTestStructuralIndels: ", spec); }