Bugfix for incorrect branch diamond merging algorithm
-- Previous version was just incorrectly accumulating information about nodes that were completely eliminated by the common suffix, so we were dropping some reference connections between vertices. Fixed. In the process simplified the entire algorithm and codebase -- Resolves https://jira.broadinstitute.org/browse/GSA-884
This commit is contained in:
parent
3a8f001c27
commit
6d7d21ca47
|
|
@ -143,4 +143,18 @@ public class BaseEdge {
|
|||
return edge2.multiplicity - edge1.multiplicity;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add edge to this edge, updating isRef and multiplicity as appropriate
|
||||
*
|
||||
* isRef is simply the or of this and edge
|
||||
* multiplicity is the sum
|
||||
*
|
||||
* @param edge the edge to add
|
||||
*/
|
||||
public void add(final BaseEdge edge) {
|
||||
if ( edge == null ) throw new IllegalArgumentException("edge cannot be null");
|
||||
this.multiplicity += edge.getMultiplicity();
|
||||
this.isRef = this.isRef || edge.isRef();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,11 +47,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.jgrapht.EdgeFactory;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
import org.jgrapht.traverse.DepthFirstIterator;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -64,7 +64,7 @@ import java.util.*;
|
|||
* User: rpoplin
|
||||
* Date: 2/6/13
|
||||
*/
|
||||
|
||||
@Invariant("!this.isAllowingMultipleEdges()")
|
||||
public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, BaseEdge> {
|
||||
protected final static Logger logger = Logger.getLogger(BaseGraph.class);
|
||||
private final int kmerSize;
|
||||
|
|
@ -513,4 +513,19 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the edge between source and target, or null if none is present
|
||||
*
|
||||
* Note that since we don't allow multiple edges between vertices there can be at most
|
||||
* one edge between any two edges
|
||||
*
|
||||
* @param source the source vertex for our edge
|
||||
* @param target the target vertex for our edge
|
||||
* @return the edge joining source to target, or null if none is present
|
||||
*/
|
||||
public BaseEdge getEdge(final T source, final T target) {
|
||||
final Set<BaseEdge> edges = getAllEdges(source, target);
|
||||
return edges.isEmpty() ? null : edges.iterator().next();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -149,7 +150,7 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
* Perform as many branch simplifications and merging operations as possible on this graph,
|
||||
* modifying it in place.
|
||||
*/
|
||||
private void mergeBranchingNodes() {
|
||||
protected void mergeBranchingNodes() {
|
||||
boolean foundNodesToMerge = true;
|
||||
while( foundNodesToMerge ) {
|
||||
foundNodesToMerge = false;
|
||||
|
|
@ -288,61 +289,48 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
|
|||
|
||||
final SeqVertex diamondBottom = getDiamondBottom(top);
|
||||
final Set<SeqVertex> middleVertices = getMiddleVertices(top);
|
||||
|
||||
final List<SeqVertex> verticesToRemove = new LinkedList<SeqVertex>();
|
||||
final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();
|
||||
|
||||
// all of the edges point to the same sink, so it's time to merge
|
||||
final byte[] commonSuffix = commonSuffixOfEdgeTargets(middleVertices);
|
||||
if ( commonSuffix != null ) {
|
||||
boolean newBottomEdgeIsRef = false;
|
||||
int newBottomEdgeMultiplicity = 0;
|
||||
|
||||
final BaseEdge botToNewBottom = new BaseEdge(false, 0);
|
||||
final BaseEdge elimMiddleNodeEdge = new BaseEdge(false, 0);
|
||||
final SeqVertex newBottomV = new SeqVertex(commonSuffix);
|
||||
addVertex(newBottomV);
|
||||
|
||||
for ( final SeqVertex middle : middleVertices ) {
|
||||
boolean missingNodeEdgeIsRef = false;
|
||||
int missingNodeMultiplicity = 0;
|
||||
final SeqVertex withoutSuffix = middle.withoutSuffix(commonSuffix);
|
||||
final BaseEdge topToMiddleEdge = getEdge(top, middle);
|
||||
final BaseEdge middleToBottomE = getEdge(middle, diamondBottom);
|
||||
|
||||
if ( withoutSuffix != null ) // this node is a deletion
|
||||
// clip out the two edges, since we'll be replacing them later
|
||||
edgesToRemove.add(topToMiddleEdge);
|
||||
edgesToRemove.add(middleToBottomE);
|
||||
|
||||
if ( withoutSuffix != null ) { // this node is a deletion
|
||||
addVertex(withoutSuffix);
|
||||
|
||||
// update all edges from top -> middle to be top -> without suffix
|
||||
for( final BaseEdge topToMiddleEdge : getAllEdges(top, middle) ) {
|
||||
edgesToRemove.add(topToMiddleEdge);
|
||||
missingNodeMultiplicity += topToMiddleEdge.getMultiplicity();
|
||||
missingNodeEdgeIsRef = missingNodeEdgeIsRef || topToMiddleEdge.isRef();
|
||||
if ( withoutSuffix != null ) // this node is a deletion
|
||||
addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge.isRef(), topToMiddleEdge.getMultiplicity()));
|
||||
// update edge from top -> middle to be top -> without suffix
|
||||
addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge));
|
||||
addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE));
|
||||
} else {
|
||||
// this middle node is == the common suffix, wo we're removing the edge
|
||||
elimMiddleNodeEdge.add(topToMiddleEdge);
|
||||
}
|
||||
|
||||
// reattached prefix to the new bottom V by updating all edges from middleV -> bottom
|
||||
for ( final BaseEdge middleToBottomE : getAllEdges(middle, diamondBottom) ) {
|
||||
missingNodeMultiplicity += middleToBottomE.getMultiplicity();
|
||||
missingNodeEdgeIsRef = missingNodeEdgeIsRef || middleToBottomE.isRef();
|
||||
|
||||
if ( withoutSuffix != null ) // this node is a deletion
|
||||
addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE.isRef(), middleToBottomE.getMultiplicity()));
|
||||
edgesToRemove.add(middleToBottomE);
|
||||
|
||||
// update the info for the new bottom edge
|
||||
newBottomEdgeIsRef = newBottomEdgeIsRef || middleToBottomE.isRef();
|
||||
newBottomEdgeMultiplicity += middleToBottomE.getMultiplicity();
|
||||
}
|
||||
|
||||
if ( withoutSuffix == null ) // add an edge from top to new bottom
|
||||
addEdge(top, newBottomV, new BaseEdge(missingNodeEdgeIsRef, missingNodeMultiplicity));
|
||||
|
||||
// include the ref and multi of mid -> bot in our edge from new bot -> bot
|
||||
botToNewBottom.add(middleToBottomE);
|
||||
verticesToRemove.add(middle);
|
||||
}
|
||||
|
||||
addEdge(newBottomV, diamondBottom, new BaseEdge(newBottomEdgeIsRef, newBottomEdgeMultiplicity));
|
||||
// add an edge from top to new bottom, because some middle nodes were removed
|
||||
if ( elimMiddleNodeEdge.getMultiplicity() > 0 )
|
||||
addEdge(top, newBottomV, elimMiddleNodeEdge);
|
||||
|
||||
addEdge(newBottomV, diamondBottom, botToNewBottom);
|
||||
|
||||
removeAllEdges(edgesToRemove);
|
||||
removeAllVertices(verticesToRemove);
|
||||
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -311,4 +312,33 @@ public class SeqGraphUnitTest extends BaseTest {
|
|||
merged.simplifyGraph();
|
||||
Assert.assertTrue(SeqGraph.graphEquals(merged, expected));
|
||||
}
|
||||
|
||||
// A -> ACT -> C [non-ref]
|
||||
// A -> ACT -> C [non-ref]
|
||||
// A -> ACT -> C [ref]
|
||||
//
|
||||
// Should become A -> ACT -> C [ref and non-ref edges]
|
||||
//
|
||||
@Test
|
||||
public void testBubbleSameBasesWithRef() {
|
||||
final SeqGraph graph = new SeqGraph();
|
||||
final SeqVertex top = new SeqVertex("A");
|
||||
final SeqVertex mid1 = new SeqVertex("ACT");
|
||||
final SeqVertex mid2 = new SeqVertex("ACT");
|
||||
final SeqVertex bot = new SeqVertex("C");
|
||||
graph.addVertices(top, mid1, mid2, bot);
|
||||
graph.addEdges(top, mid2, bot);
|
||||
graph.addEdge(top, mid1, new BaseEdge(true, 1));
|
||||
graph.addEdge(mid1, bot, new BaseEdge(true, 1));
|
||||
|
||||
final SeqGraph expected = new SeqGraph();
|
||||
expected.addVertices(top, mid1, bot);
|
||||
expected.addEdge(top, mid1, new BaseEdge(true, 2));
|
||||
expected.addEdge(mid1, bot, new BaseEdge(true, 2));
|
||||
|
||||
final SeqGraph actual = ((SeqGraph)graph.clone());
|
||||
actual.mergeBranchingNodes();
|
||||
|
||||
Assert.assertTrue(BaseGraph.graphEquals(actual, expected));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue