From c191d7de8c15fac2a23cbc7e86bd2cf369906a34 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Apr 2013 09:20:34 -0400 Subject: [PATCH] Critical bugfix for CommonSuffixSplitter -- Graphs with cycles from the bottom node to one of the middle nodes would introduce an infinite cycle in the algorithm. Created unit test that reproduced the issue, and then fixed the underlying issue. --- .../graphs/CommonSuffixSplitter.java | 7 +++- .../graphs/CommonSuffixMergerUnitTest.java | 9 +++++ .../graphs/CommonSuffixSplitterUnitTest.java | 35 +++++++++++++++++-- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java index f3a41ee8b..dabfbb322 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Requires; +import java.io.File; import java.util.*; /** @@ -150,10 +151,14 @@ public class CommonSuffixSplitter { * @return true if we can safely split up toMerge */ private boolean safeToSplit(final SeqGraph graph, final SeqVertex bot, final Collection toMerge) { + final Set outgoingOfBot = new HashSet(graph.outgoingVerticesOf(bot)); for ( final SeqVertex m : toMerge ) { final Set outs = graph.outgoingEdgesOf(m); if ( m == bot || outs.size() != 1 || ! graph.outgoingVerticesOf(m).contains(bot) ) - // m == bot => don't allow cycles in the graph + // m == bot => don't allow self cycles in the graph + return false; + if ( outgoingOfBot.contains(m) ) + // forbid cycles from bottom -> mid return false; } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java index 012add769..8682ae5e4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java @@ -72,6 +72,15 @@ public class CommonSuffixMergerUnitTest extends BaseTest { this.v = v; this.commonSuffix = commonSuffix; } + + @Override + public String toString() { + return "SplitMergeData{" + + "graph=" + graph + + ", v=" + v + + ", commonSuffix='" + commonSuffix + '\'' + + '}'; + } } public static Object[][] makeSplitMergeData(final int maxTests) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java index f03dc8762..8006cb18d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitterUnitTest.java @@ -51,13 +51,18 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; +import java.util.Arrays; + public class CommonSuffixSplitterUnitTest extends BaseTest { + private final static boolean DEBUG = false; + @DataProvider(name = "SplitData") public Object[][] makeSplitData() { return CommonSuffixMergerUnitTest.makeSplitMergeData(-1); } - @Test(dataProvider = "SplitData") + @Test(dataProvider = "SplitData", enabled = !DEBUG) public void testSplit(final CommonSuffixMergerUnitTest.SplitMergeData data) { final boolean expectedMerge = ! data.commonSuffix.isEmpty() && data.graph.inDegreeOf(data.v) > 1; @@ -74,7 +79,7 @@ public class CommonSuffixSplitterUnitTest extends BaseTest { CommonSuffixMergerUnitTest.assertSameHaplotypes(String.format("suffixSplit.%s.%d", data.commonSuffix, data.graph.vertexSet().size()), data.graph, original); } - @Test + @Test(enabled = !DEBUG) public void testSplitPrevHaveMultipleEdges() { final SeqGraph original = new SeqGraph(); final SeqVertex v1 = new SeqVertex("A"); @@ -93,7 +98,7 @@ public class CommonSuffixSplitterUnitTest extends BaseTest { Assert.assertFalse(new CommonSuffixSplitter().split(original, v3), "Cannot split graph with multiple outgoing edges from middle nodes"); } - @Test + @Test(enabled = !DEBUG) public void testSplitNoCycles() { final SeqGraph original = new SeqGraph(); final SeqVertex v1 = new SeqVertex("A"); @@ -110,4 +115,28 @@ public class CommonSuffixSplitterUnitTest extends BaseTest { original.addEdges(v4, v4); Assert.assertFalse(new CommonSuffixSplitter().split(original, v4), "Cannot split graph with a cycle of the bottom list"); } + + @Test(timeOut = 10000) + public void testSplitComplexCycle() { + final SeqGraph original = new SeqGraph(); + final SeqVertex r1 = new SeqVertex("ACTG"); + final SeqVertex r2 = new SeqVertex("ATGC"); + final SeqVertex cat1 = new SeqVertex("CAT"); + final SeqVertex cat2 = new SeqVertex("CAT"); + final SeqVertex c1 = new SeqVertex("C"); + final SeqVertex c2 = new SeqVertex("C"); + + original.addVertices(r1, r2, cat1, cat2, c1, c2); + original.addEdges(r1, cat1, c1, cat2, c1); + original.addEdges(r2, c2, cat2); + + original.printGraph(new File("testSplitComplexCycle.dot"), 0); + + for ( final SeqVertex v : Arrays.asList(cat2) ) { // original.vertexSet() ) { + final SeqGraph graph = (SeqGraph)original.clone(); + final boolean success = new CommonSuffixSplitter().split(graph, v); + if ( success ) graph.printGraph(new File("testSplitComplexCycle.fail.dot"), 0); + Assert.assertFalse(success, "Shouldn't be able to split any vertices but CommonSuffixSplitter says it could for " + v); + } + } }