Merge pull request #152 from broadinstitute/md_commonsuffix_infinite_loop_bugfix
Critical bugfix for CommonSuffixSplitter to avoid infinite loops
This commit is contained in:
commit
2fb8b61dd0
|
|
@ -90,21 +90,23 @@ public class CommonSuffixSplitter {
|
|||
if ( v == null ) throw new IllegalArgumentException("v cannot be null");
|
||||
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex v " + v);
|
||||
|
||||
final Collection<SeqVertex> toMerge = graph.incomingVerticesOf(v);
|
||||
if ( toMerge.size() < 2 )
|
||||
final Collection<SeqVertex> toSplit = graph.incomingVerticesOf(v);
|
||||
if ( toSplit.size() < 2 )
|
||||
// Can only split at least 2 vertices
|
||||
return false;
|
||||
else if ( ! safeToSplit(graph, v, toMerge) ) {
|
||||
else if ( ! safeToSplit(graph, v, toSplit) ) {
|
||||
return false;
|
||||
} else {
|
||||
final SeqVertex suffixVTemplate = commonSuffix(toMerge);
|
||||
final SeqVertex suffixVTemplate = commonSuffix(toSplit);
|
||||
if ( suffixVTemplate.isEmpty() ) {
|
||||
return false;
|
||||
} else if ( allVerticesAreTheCommonSuffix(suffixVTemplate, toSplit) ) {
|
||||
return false;
|
||||
} else {
|
||||
final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();
|
||||
|
||||
// graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." + counter + ".dot"), 0);
|
||||
for ( final SeqVertex mid : toMerge ) {
|
||||
for ( final SeqVertex mid : toSplit ) {
|
||||
// create my own copy of the suffix
|
||||
final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence());
|
||||
graph.addVertex(suffixV);
|
||||
|
|
@ -130,7 +132,7 @@ public class CommonSuffixSplitter {
|
|||
}
|
||||
}
|
||||
|
||||
graph.removeAllVertices(toMerge);
|
||||
graph.removeAllVertices(toSplit);
|
||||
graph.removeAllEdges(edgesToRemove);
|
||||
// graph.printGraph(new File("split.post_" + v.getSequenceString() + "." + counter++ + ".dot"), 0);
|
||||
|
||||
|
|
@ -141,6 +143,25 @@ public class CommonSuffixSplitter {
|
|||
|
||||
// private static int counter = 0;
|
||||
|
||||
/**
|
||||
* Would all vertices that we'd split just result in the common suffix?
|
||||
*
|
||||
* That is, suppose we have prefix nodes ABC and ABC. After splitting all of the vertices would
|
||||
* just be ABC again, and we'd enter into an infinite loop.
|
||||
*
|
||||
* @param commonSuffix the common suffix of all vertices in toSplits
|
||||
* @param toSplits the collection of vertices we want to split
|
||||
* @return true if all of the vertices are equal to the common suffix
|
||||
*/
|
||||
private boolean allVerticesAreTheCommonSuffix(final SeqVertex commonSuffix, final Collection<SeqVertex> toSplits) {
|
||||
for ( final SeqVertex toSplit : toSplits ) {
|
||||
if ( toSplit.length() != commonSuffix.length() )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can we safely split up the vertices in toMerge?
|
||||
*
|
||||
|
|
|
|||
|
|
@ -72,6 +72,12 @@ public final class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
*/
|
||||
protected final static int MIN_COMMON_SEQUENCE_TO_MERGE_SOURCE_SINK_VERTICES = 10;
|
||||
|
||||
/**
|
||||
* How many cycles of the graph simplifications algorithms will we run before
|
||||
* thinking something has gone wrong and throw an exception?
|
||||
*/
|
||||
private final static int MAX_REASONABLE_SIMPLIFICATION_CYCLES = 100;
|
||||
|
||||
/**
|
||||
* Construct an empty SeqGraph
|
||||
*/
|
||||
|
|
@ -101,29 +107,56 @@ public final class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
}
|
||||
|
||||
protected void simplifyGraph(final int maxCycles) {
|
||||
boolean didSomeWork;
|
||||
int i = 0;
|
||||
|
||||
// start off with one round of zipping of chains for performance reasons
|
||||
zipLinearChains();
|
||||
do {
|
||||
//logger.info("simplifyGraph iteration " + i);
|
||||
// iterate until we haven't don't anything useful
|
||||
didSomeWork = false;
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".1.dot"), 0);
|
||||
didSomeWork |= new MergeDiamonds().transformUntilComplete();
|
||||
didSomeWork |= new MergeTails().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".2.diamonds_and_tails.dot"), 0);
|
||||
|
||||
didSomeWork |= new SplitCommonSuffices().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".3.split_suffix.dot"), 0);
|
||||
didSomeWork |= new MergeCommonSuffices().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".4.merge_suffix.dot"), 0);
|
||||
SeqGraph prevGraph = null;
|
||||
for( int i = 0; i < maxCycles; i++ ) {
|
||||
if ( i > MAX_REASONABLE_SIMPLIFICATION_CYCLES ) {
|
||||
logger.warn("Infinite loop detected in simpliciation routines. Writing current graph to debugMeMark.dot");
|
||||
printGraph(new File("debugMeMark.dot"), 0);
|
||||
throw new IllegalStateException("Infinite loop detected in simplification routines for kmer graph " + getKmerSize());
|
||||
}
|
||||
|
||||
didSomeWork |= new MergeHeadlessIncomingSources().transformUntilComplete();
|
||||
didSomeWork |= zipLinearChains();
|
||||
i++;
|
||||
} while (didSomeWork && i < maxCycles);
|
||||
final boolean didSomeWork = simplifyGraphOnce(i);
|
||||
if ( ! didSomeWork )
|
||||
// no simplification algorithm could run, so stop
|
||||
break;
|
||||
|
||||
// we get five cycles before we start looking for changes in the graph
|
||||
// by cloning ourselves and then checking for any changes
|
||||
if ( i > 5 ) {
|
||||
// the previous graph and this graph have the same structure, so the simplification
|
||||
// algorithms are looping endless between states. Just break and consider ourselves done
|
||||
if ( prevGraph != null && graphEquals(prevGraph, this) )
|
||||
break;
|
||||
|
||||
prevGraph = (SeqGraph)clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run one full cycle of the graph simplification algorithms
|
||||
* @return true if any algorithms said they did some simplification
|
||||
*/
|
||||
private boolean simplifyGraphOnce(final int iteration) {
|
||||
//logger.info("simplifyGraph iteration " + i);
|
||||
// iterate until we haven't don't anything useful
|
||||
boolean didSomeWork = false;
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".1.dot"), 0);
|
||||
didSomeWork |= new MergeDiamonds().transformUntilComplete();
|
||||
didSomeWork |= new MergeTails().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".2.diamonds_and_tails.dot"), 0);
|
||||
|
||||
didSomeWork |= new SplitCommonSuffices().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".3.split_suffix.dot"), 0);
|
||||
didSomeWork |= new MergeCommonSuffices().transformUntilComplete();
|
||||
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + iteration + ".4.merge_suffix.dot"), 0);
|
||||
|
||||
didSomeWork |= new MergeHeadlessIncomingSources().transformUntilComplete();
|
||||
didSomeWork |= zipLinearChains();
|
||||
return didSomeWork;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -431,15 +464,13 @@ public final class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
*
|
||||
* Performs the transformation:
|
||||
*
|
||||
* { x + S_i + y -> Z }
|
||||
* { x + S_i -> y -> Z }
|
||||
*
|
||||
* goes to:
|
||||
*
|
||||
* { x -> S_i -> y -> Z }
|
||||
* { x -> S_i -> y + Z }
|
||||
*
|
||||
* for all nodes that match this configuration.
|
||||
*
|
||||
* Differs from the diamond transform in that no top node is required
|
||||
*/
|
||||
protected class MergeCommonSuffices extends VertexBasedTransformer {
|
||||
@Override
|
||||
|
|
@ -449,8 +480,6 @@ public final class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Merge headless configurations:
|
||||
*
|
||||
* Performs the transformation:
|
||||
*
|
||||
* { x + S_i + y -> Z }
|
||||
|
|
|
|||
|
|
@ -63,8 +63,8 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6b673efb6f12b5deebb3e63fe94c48ed");
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "490ecf6619740c01c81a463392ef23cf");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("e8466846ca420bcbcd52b97f7a661aa3"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6fd839641ee038048626fbd1154f173"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -102,8 +102,8 @@ public class CommonSuffixSplitterUnitTest extends BaseTest {
|
|||
public void testSplitNoCycles() {
|
||||
final SeqGraph original = new SeqGraph();
|
||||
final SeqVertex v1 = new SeqVertex("A");
|
||||
final SeqVertex v2 = new SeqVertex("C");
|
||||
final SeqVertex v3 = new SeqVertex("C");
|
||||
final SeqVertex v2 = new SeqVertex("AC");
|
||||
final SeqVertex v3 = new SeqVertex("TC");
|
||||
final SeqVertex v4 = new SeqVertex("G");
|
||||
|
||||
original.addVertices(v1, v2, v3, v4);
|
||||
|
|
@ -116,7 +116,7 @@ public class CommonSuffixSplitterUnitTest extends BaseTest {
|
|||
Assert.assertFalse(new CommonSuffixSplitter().split(original, v4), "Cannot split graph with a cycle of the bottom list");
|
||||
}
|
||||
|
||||
@Test(timeOut = 10000)
|
||||
@Test(timeOut = 10000, enabled = !DEBUG)
|
||||
public void testSplitComplexCycle() {
|
||||
final SeqGraph original = new SeqGraph();
|
||||
final SeqVertex r1 = new SeqVertex("ACTG");
|
||||
|
|
@ -130,7 +130,7 @@ public class CommonSuffixSplitterUnitTest extends BaseTest {
|
|||
original.addEdges(r1, cat1, c1, cat2, c1);
|
||||
original.addEdges(r2, c2, cat2);
|
||||
|
||||
original.printGraph(new File("testSplitComplexCycle.dot"), 0);
|
||||
//original.printGraph(new File("testSplitComplexCycle.dot"), 0);
|
||||
|
||||
for ( final SeqVertex v : Arrays.asList(cat2) ) { // original.vertexSet() ) {
|
||||
final SeqGraph graph = (SeqGraph)original.clone();
|
||||
|
|
@ -139,4 +139,32 @@ public class CommonSuffixSplitterUnitTest extends BaseTest {
|
|||
Assert.assertFalse(success, "Shouldn't be able to split any vertices but CommonSuffixSplitter says it could for " + v);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeOut = 10000)
|
||||
public void testSplitInfiniteCycleFailure() {
|
||||
final SeqGraph original = new SeqGraph();
|
||||
final SeqVertex v1 = new SeqVertex("GC");
|
||||
final SeqVertex v2 = new SeqVertex("X");
|
||||
final SeqVertex v3 = new SeqVertex("N");
|
||||
final SeqVertex v4 = new SeqVertex("C");
|
||||
|
||||
original.addVertices(v1, v2, v3, v4);
|
||||
original.addEdge(v1, v2, new BaseEdge(false, 12));
|
||||
original.addEdge(v2, v3, new BaseEdge(false, 23));
|
||||
original.addEdge(v3, v4, new BaseEdge(false, 34));
|
||||
original.addEdge(v4, v2, new BaseEdge(false, 42));
|
||||
|
||||
original.printGraph(new File("testSplitInfiniteCycleFailure.dot"), 0);
|
||||
|
||||
final SeqGraph graph = (SeqGraph)original.clone();
|
||||
final boolean success = new CommonSuffixSplitter().split(graph, v2);
|
||||
Assert.assertTrue(success);
|
||||
|
||||
for ( final SeqVertex v : graph.vertexSet() ) {
|
||||
graph.printGraph(new File("testSplitInfiniteCycleFailure.first_split.dot"), 0);
|
||||
final boolean success2 = new CommonSuffixSplitter().split((SeqGraph)graph.clone(), v);
|
||||
if ( success2 ) graph.printGraph(new File("testSplitInfiniteCycleFailure.fail.dot"), 0);
|
||||
Assert.assertFalse(success2, "Shouldn't be able to split any vertices but CommonSuffixSplitter says it could for " + v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -521,4 +521,22 @@ public class SeqGraphUnitTest extends BaseTest {
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeOut = 10000)
|
||||
public void testInfiniteCycleFromEmpiricalRuns() {
|
||||
final SeqVertex v1 = new SeqVertex("CCCT");
|
||||
final SeqVertex v2 = new SeqVertex("CATCCTCCCTTCTAGACTTCTCCTCCTCCTCCACCATCCTCCCCTCTAGACTTCTCCTCCTCCTCCACCATCCTCCCCTCTAGACTTCTCCTCCTCCTCC");
|
||||
final SeqVertex v3 = new SeqVertex("CTAGACTTCTCCTCCTCCTCC");
|
||||
final SeqVertex v4 = new SeqVertex("ACCATC");
|
||||
final SeqVertex v5 = new SeqVertex("CCTCCACCATCCTCCCCTCTAGGCTTCTCCTCCTCCTCCACCATCCTCCCCTCTAGACTTCTCCTCCTCCTCCACCATCCTCCCCTCTAGACTTCTCCTCCTCCTCCACCATC");
|
||||
final SeqVertex v6 = new SeqVertex("CTCCCCT");
|
||||
|
||||
final SeqGraph graph = new SeqGraph();
|
||||
graph.addVertices(v1, v2, v3, v4, v5, v6);
|
||||
graph.addEdges(v1, v3, v4, v6, v3);
|
||||
graph.addEdges(v2, v4);
|
||||
graph.addEdges(v5, v6);
|
||||
|
||||
graph.simplifyGraph();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue