Bugfix for incorrect branch diamond merging algorithm

-- Previous version was just incorrectly accumulating information about nodes that were completely eliminated by the common suffix, so we were dropping some reference connections between vertices.  Fixed.  In the process simplified the entire algorithm and codebase
-- Resolves https://jira.broadinstitute.org/browse/GSA-884
This commit is contained in:
Mark DePristo 2013-03-20 15:57:10 -04:00
parent 3a8f001c27
commit 6d7d21ca47
4 changed files with 85 additions and 38 deletions

View File

@ -143,4 +143,18 @@ public class BaseEdge {
return edge2.multiplicity - edge1.multiplicity;
}
}
/**
* Add edge to this edge, updating isRef and multiplicity as appropriate
*
* isRef is simply the or of this and edge
* multiplicity is the sum
*
* @param edge the edge to add
*/
public void add(final BaseEdge edge) {
if ( edge == null ) throw new IllegalArgumentException("edge cannot be null");
this.multiplicity += edge.getMultiplicity();
this.isRef = this.isRef || edge.isRef();
}
}

View File

@ -47,11 +47,11 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.jgrapht.EdgeFactory;
import org.jgrapht.graph.DefaultDirectedGraph;
import org.jgrapht.traverse.DepthFirstIterator;
import java.io.File;
import java.io.FileNotFoundException;
@ -64,7 +64,7 @@ import java.util.*;
* User: rpoplin
* Date: 2/6/13
*/
@Invariant("!this.isAllowingMultipleEdges()")
public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, BaseEdge> {
protected final static Logger logger = Logger.getLogger(BaseGraph.class);
private final int kmerSize;
@ -513,4 +513,19 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
}
return true;
}
/**
* Get the edge between source and target, or null if none is present
*
* Note that since we don't allow multiple edges between vertices there can be at most
* one edge between any two edges
*
* @param source the source vertex for our edge
* @param target the target vertex for our edge
* @return the edge joining source to target, or null if none is present
*/
public BaseEdge getEdge(final T source, final T target) {
final Set<BaseEdge> edges = getAllEdges(source, target);
return edges.isEmpty() ? null : edges.iterator().next();
}
}

View File

@ -51,6 +51,7 @@ import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import java.io.File;
import java.util.*;
/**
@ -149,7 +150,7 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
* Perform as many branch simplifications and merging operations as possible on this graph,
* modifying it in place.
*/
private void mergeBranchingNodes() {
protected void mergeBranchingNodes() {
boolean foundNodesToMerge = true;
while( foundNodesToMerge ) {
foundNodesToMerge = false;
@ -288,61 +289,48 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
final SeqVertex diamondBottom = getDiamondBottom(top);
final Set<SeqVertex> middleVertices = getMiddleVertices(top);
final List<SeqVertex> verticesToRemove = new LinkedList<SeqVertex>();
final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();
// all of the edges point to the same sink, so it's time to merge
final byte[] commonSuffix = commonSuffixOfEdgeTargets(middleVertices);
if ( commonSuffix != null ) {
boolean newBottomEdgeIsRef = false;
int newBottomEdgeMultiplicity = 0;
final BaseEdge botToNewBottom = new BaseEdge(false, 0);
final BaseEdge elimMiddleNodeEdge = new BaseEdge(false, 0);
final SeqVertex newBottomV = new SeqVertex(commonSuffix);
addVertex(newBottomV);
for ( final SeqVertex middle : middleVertices ) {
boolean missingNodeEdgeIsRef = false;
int missingNodeMultiplicity = 0;
final SeqVertex withoutSuffix = middle.withoutSuffix(commonSuffix);
final BaseEdge topToMiddleEdge = getEdge(top, middle);
final BaseEdge middleToBottomE = getEdge(middle, diamondBottom);
if ( withoutSuffix != null ) // this node is a deletion
// clip out the two edges, since we'll be replacing them later
edgesToRemove.add(topToMiddleEdge);
edgesToRemove.add(middleToBottomE);
if ( withoutSuffix != null ) { // this node is a deletion
addVertex(withoutSuffix);
// update all edges from top -> middle to be top -> without suffix
for( final BaseEdge topToMiddleEdge : getAllEdges(top, middle) ) {
edgesToRemove.add(topToMiddleEdge);
missingNodeMultiplicity += topToMiddleEdge.getMultiplicity();
missingNodeEdgeIsRef = missingNodeEdgeIsRef || topToMiddleEdge.isRef();
if ( withoutSuffix != null ) // this node is a deletion
addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge.isRef(), topToMiddleEdge.getMultiplicity()));
// update edge from top -> middle to be top -> without suffix
addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge));
addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE));
} else {
// this middle node is == the common suffix, wo we're removing the edge
elimMiddleNodeEdge.add(topToMiddleEdge);
}
// reattached prefix to the new bottom V by updating all edges from middleV -> bottom
for ( final BaseEdge middleToBottomE : getAllEdges(middle, diamondBottom) ) {
missingNodeMultiplicity += middleToBottomE.getMultiplicity();
missingNodeEdgeIsRef = missingNodeEdgeIsRef || middleToBottomE.isRef();
if ( withoutSuffix != null ) // this node is a deletion
addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE.isRef(), middleToBottomE.getMultiplicity()));
edgesToRemove.add(middleToBottomE);
// update the info for the new bottom edge
newBottomEdgeIsRef = newBottomEdgeIsRef || middleToBottomE.isRef();
newBottomEdgeMultiplicity += middleToBottomE.getMultiplicity();
}
if ( withoutSuffix == null ) // add an edge from top to new bottom
addEdge(top, newBottomV, new BaseEdge(missingNodeEdgeIsRef, missingNodeMultiplicity));
// include the ref and multi of mid -> bot in our edge from new bot -> bot
botToNewBottom.add(middleToBottomE);
verticesToRemove.add(middle);
}
addEdge(newBottomV, diamondBottom, new BaseEdge(newBottomEdgeIsRef, newBottomEdgeMultiplicity));
// add an edge from top to new bottom, because some middle nodes were removed
if ( elimMiddleNodeEdge.getMultiplicity() > 0 )
addEdge(top, newBottomV, elimMiddleNodeEdge);
addEdge(newBottomV, diamondBottom, botToNewBottom);
removeAllEdges(edgesToRemove);
removeAllVertices(verticesToRemove);
return true;
} else {
return false;

View File

@ -51,6 +51,7 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -311,4 +312,33 @@ public class SeqGraphUnitTest extends BaseTest {
merged.simplifyGraph();
Assert.assertTrue(SeqGraph.graphEquals(merged, expected));
}
// A -> ACT -> C [non-ref]
// A -> ACT -> C [non-ref]
// A -> ACT -> C [ref]
//
// Should become A -> ACT -> C [ref and non-ref edges]
//
@Test
public void testBubbleSameBasesWithRef() {
final SeqGraph graph = new SeqGraph();
final SeqVertex top = new SeqVertex("A");
final SeqVertex mid1 = new SeqVertex("ACT");
final SeqVertex mid2 = new SeqVertex("ACT");
final SeqVertex bot = new SeqVertex("C");
graph.addVertices(top, mid1, mid2, bot);
graph.addEdges(top, mid2, bot);
graph.addEdge(top, mid1, new BaseEdge(true, 1));
graph.addEdge(mid1, bot, new BaseEdge(true, 1));
final SeqGraph expected = new SeqGraph();
expected.addVertices(top, mid1, bot);
expected.addEdge(top, mid1, new BaseEdge(true, 2));
expected.addEdge(mid1, bot, new BaseEdge(true, 2));
final SeqGraph actual = ((SeqGraph)graph.clone());
actual.mergeBranchingNodes();
Assert.assertTrue(BaseGraph.graphEquals(actual, expected));
}
}