HaplotypeCaller instructure cleanup and unit testing

-- UnitTest for isRootOfDiamond along with key bugfix detected while testing
-- Fix up the equals methods in BaseEdge.  Now called hasSameSourceAndTarget and seqEquals.  A much more meaningful naming
-- Generalize graphEquals to use seqEquals, so it works equally well with Debruijn and SeqGraphs
-- Add BaseVertex method called seqEquals that returns true if two BaseVertex objects have the same sequence
-- Reorganize SeqGraph mergeNodes into a single master function that does zipping, branch merging, and zipping again, rather than having this done in the DeBruijnAssembler itself
-- Massive expansion of the SeqGraph unit tests.  We now really test out the zipping and branch merging code.
-- Near final cleanup of the current codebase
-- DeBruijnVertex cleanup and optimizations.  Since kmer graphs don't allow sequences longer than the kmer size, the suffix is always a byte, not a byte[].  Optimize the code to make use of this constraint
This commit is contained in:
Mark DePristo 2013-03-19 18:09:23 -04:00
parent 2e36f15861
commit 5226b24a11
9 changed files with 435 additions and 86 deletions

View File

@ -116,14 +116,21 @@ public class BaseEdge {
this.isRef = isRef;
}
// For use when comparing edges pulled from the same graph
public <T extends BaseVertex> boolean equals( final BaseGraph<T> graph, final BaseEdge edge ) {
/**
* Does this and edge have the same source and target vertices in graph?
*
* @param graph the graph containing both this and edge
* @param edge our comparator edge
* @param <T>
* @return true if we have the same source and target vertices
*/
public <T extends BaseVertex> boolean hasSameSourceAndTarget(final BaseGraph<T> graph, final BaseEdge edge) {
return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge)));
}
// For use when comparing edges across graphs!
public <T extends BaseVertex> boolean equals( final BaseGraph<T> graph, final BaseEdge edge, final BaseGraph<T> graph2 ) {
return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
public <T extends BaseVertex> boolean seqEquals( final BaseGraph<T> graph, final BaseEdge edge, final BaseGraph<T> graph2 ) {
return (graph.getEdgeSource(this).seqEquals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).seqEquals(graph2.getEdgeTarget(edge)));
}
/**

View File

@ -310,6 +310,19 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
addVertex(v);
}
/**
* Convenience function to add multiple edges to the graph
* @param start the first vertex to connect
* @param remaining all additional vertices to connect
*/
public void addEdges(final T start, final T ... remaining) {
T prev = start;
for ( final T next : remaining ) {
addEdge(prev, next);
prev = next;
}
}
/**
* Get the set of vertices connected by outgoing edges of V
* @param v a non-null vertex
@ -451,28 +464,50 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
}
}
// for ( final T remove : toRemove )
// logger.info("Cleaning up nodes not attached to any reference node: " + remove.toString());
removeAllVertices(toRemove);
}
}
/**
* Semi-lenient comparison of two graphs, truing true if g1 and g2 have similar structure
*
* By similar this means that both graphs have the same number of vertices, where each vertex can find
* a vertex in the other graph that's seqEqual to it. A similar constraint applies to the edges,
* where all edges in g1 must have a corresponding edge in g2 where both source and target vertices are
* seqEqual
*
* @param g1 the first graph to compare
* @param g2 the second graph to compare
* @param <T> the type of the nodes in those graphs
* @return true if g1 and g2 are equals
*/
public static <T extends BaseVertex> boolean graphEquals(final BaseGraph<T> g1, BaseGraph<T> g2) {
if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) {
final Set<T> vertices1 = g1.vertexSet();
final Set<T> vertices2 = g2.vertexSet();
final Set<BaseEdge> edges1 = g1.edgeSet();
final Set<BaseEdge> edges2 = g2.edgeSet();
if ( vertices1.size() != vertices2.size() || edges1.size() != edges2.size() )
return false;
for ( final T v1 : vertices1 ) {
boolean found = false;
for ( final T v2 : vertices2 )
found = found || v1.getSequenceString().equals(v2.getSequenceString());
if ( ! found ) return false;
}
for( BaseEdge e1 : g1.edgeSet() ) {
for( final BaseEdge e1 : g1.edgeSet() ) {
boolean found = false;
for( BaseEdge e2 : g2.edgeSet() ) {
if( e1.equals(g1, e2, g2) ) { found = true; break; }
if( e1.seqEquals(g1, e2, g2) ) { found = true; break; }
}
if( !found ) { return false; }
}
for( BaseEdge e2 : g2.edgeSet() ) {
for( final BaseEdge e2 : g2.edgeSet() ) {
boolean found = false;
for( BaseEdge e1 : g1.edgeSet() ) {
if( e2.equals(g2, e1, g1) ) { found = true; break; }
if( e2.seqEquals(g2, e1, g1) ) { found = true; break; }
}
if( !found ) { return false; }
}

View File

@ -99,6 +99,16 @@ public class BaseVertex {
return true;
}
/**
* Are b and this equal according to their base sequences?
*
* @param b the vertex to compare ourselves to
* @return true if b and this have the same sequence, regardless of other attributes that might differentiate them
*/
public boolean seqEquals(final BaseVertex b) {
return Arrays.equals(this.getSequence(), b.getSequence());
}
@Override
public int hashCode() { // necessary to override here so that graph.containsVertex() works the same way as vertex.equals() as one might expect
return Arrays.hashCode(sequence);

View File

@ -194,15 +194,10 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), PRUNE_FACTOR);
seqGraph.pruneGraph(PRUNE_FACTOR);
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.pruned.dot"), PRUNE_FACTOR);
seqGraph.mergeNodes();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.merged.preclean.dot"), PRUNE_FACTOR);
seqGraph.removeVerticesNotConnectedToRef();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.4.merged.dot"), PRUNE_FACTOR);
seqGraph.mergeBranchingNodes();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.simplified.dot"), PRUNE_FACTOR);
seqGraph.mergeNodes();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.6.simplified.merged.dot"), PRUNE_FACTOR);
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.pruned.dot"), PRUNE_FACTOR);
seqGraph.simplifyGraph();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.merged.dot"), PRUNE_FACTOR);
return seqGraph;
}

View File

@ -47,17 +47,20 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import java.util.Arrays;
/**
* simple node class for storing kmer sequences
*
* User: ebanks
* User: ebanks, mdepristo
* Date: Mar 23, 2011
*/
public class DeBruijnVertex extends BaseVertex {
private final static byte[][] sufficesAsByteArray = new byte[256][];
static {
for ( int i = 0; i < sufficesAsByteArray.length; i++ )
sufficesAsByteArray[i] = new byte[]{(byte)(i & 0xFF)};
}
public DeBruijnVertex( final byte[] sequence ) {
super(sequence);
}
@ -85,17 +88,38 @@ public class DeBruijnVertex extends BaseVertex {
*/
@Ensures({"result != null", "result.length() >= 1"})
public String getSuffixString() {
return new String(getSuffix());
return new String(getSuffixAsArray());
}
@Ensures("result != null")
// TODO this could be replaced with byte as the suffix is guarenteed to be exactly 1 base
public byte[] getSuffix() {
return Arrays.copyOfRange( sequence, getKmer() - 1, sequence.length );
/**
* Get the suffix byte of this DeBruijnVertex
*
* The suffix byte is simply the last byte of the kmer sequence, so if this is holding sequence ACT
* getSuffix would return T
*
* @return a byte
*/
public byte getSuffix() {
return sequence[getKmer() - 1];
}
/**
* Optimized version that returns a byte[] for the single byte suffix of this graph without allocating memory.
*
* Should not be modified
*
* @return a byte[] that contains 1 byte == getSuffix()
*/
@Ensures({"result != null", "result.length == 1", "result[0] == getSuffix()"})
private byte[] getSuffixAsArray() {
return sufficesAsByteArray[getSuffix()];
}
/**
* {@inheritDoc}
*/
@Override
public byte[] getAdditionalSequence(boolean source) {
return source ? super.getAdditionalSequence(source) : getSuffix();
return source ? super.getAdditionalSequence(source) : getSuffixAsArray();
}
}

View File

@ -254,7 +254,7 @@ class Path<T extends BaseVertex> {
final BubbleStateMachine<T> bsm = new BubbleStateMachine<T>(cigar);
for( final BaseEdge e : getEdges() ) {
if( e.equals(graph, edgesInOrder.getFirst()) ) {
if ( e.hasSameSourceAndTarget(graph, edgesInOrder.getFirst()) ) {
advanceBubbleStateMachine( bsm, graph.getEdgeSource(e), null );
}
advanceBubbleStateMachine( bsm, graph.getEdgeTarget(e), e );

View File

@ -46,6 +46,8 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
@ -77,67 +79,83 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
super(kmer);
}
protected void mergeNodes() {
/**
* Simplify this graph, merging vertices together and restructuring the graph in an
* effort to minimize the number of overall vertices in the graph without changing
* in any way the sequences implied by a complex enumeration of all paths through the graph.
*/
public void simplifyGraph() {
zipLinearChains();
mergeBranchingNodes();
zipLinearChains();
}
/**
* Zip up all of the simple linear chains present in this graph.
*/
protected void zipLinearChains() {
boolean foundNodesToMerge = true;
while( foundNodesToMerge ) {
foundNodesToMerge = false;
for( final BaseEdge e : edgeSet() ) {
final SeqVertex outgoingVertex = getEdgeTarget(e);
final SeqVertex incomingVertex = getEdgeSource(e);
if( !outgoingVertex.equals(incomingVertex)
&& outDegreeOf(incomingVertex) == 1 && inDegreeOf(outgoingVertex) == 1
&& isReferenceNode(incomingVertex) == isReferenceNode(outgoingVertex) ) {
final Set<BaseEdge> outEdges = outgoingEdgesOf(outgoingVertex);
final Set<BaseEdge> inEdges = incomingEdgesOf(incomingVertex);
if( inEdges.size() == 1 && outEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
} else if( inEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
} else if( outEdges.size() == 1 ) {
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
}
final SeqVertex addedVertex = new SeqVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSequence()) );
addVertex(addedVertex);
for( final BaseEdge edge : outEdges ) {
addEdge(addedVertex, getEdgeTarget(edge), new BaseEdge(edge.isRef(), edge.getMultiplicity()));
}
for( final BaseEdge edge : inEdges ) {
addEdge(getEdgeSource(edge), addedVertex, new BaseEdge(edge.isRef(), edge.getMultiplicity()));
}
removeVertex(incomingVertex);
removeVertex(outgoingVertex);
foundNodesToMerge = true;
break;
}
}
while( zipOneLinearChain() ) {
// just keep going until zipOneLinearChain says its done
}
}
//
// X -> ABC -> Y
// -> aBC -> Y
//
// becomes
//
// X -> A -> BCY
// -> a -> BCY
//
public void mergeBranchingNodes() {
/**
* Merge together two vertices in the graph v1 -> v2 into a single vertex v' containing v1 + v2 sequence
*
* Only works on vertices where v1's only outgoing edge is to v2 and v2's only incoming edge is from v1.
*
* If such a pair of vertices is found, they are merged and the graph is update. Otherwise nothing is changed.
*
* @return true if any such pair of vertices could be found, false otherwise
*/
protected boolean zipOneLinearChain() {
for( final BaseEdge e : edgeSet() ) {
final SeqVertex outgoingVertex = getEdgeTarget(e);
final SeqVertex incomingVertex = getEdgeSource(e);
if( !outgoingVertex.equals(incomingVertex)
&& outDegreeOf(incomingVertex) == 1 && inDegreeOf(outgoingVertex) == 1
&& isReferenceNode(incomingVertex) == isReferenceNode(outgoingVertex) ) {
final Set<BaseEdge> outEdges = outgoingEdgesOf(outgoingVertex);
final Set<BaseEdge> inEdges = incomingEdgesOf(incomingVertex);
if( inEdges.size() == 1 && outEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
} else if( inEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
} else if( outEdges.size() == 1 ) {
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
}
final SeqVertex addedVertex = new SeqVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSequence()) );
addVertex(addedVertex);
for( final BaseEdge edge : outEdges ) {
addEdge(addedVertex, getEdgeTarget(edge), new BaseEdge(edge.isRef(), edge.getMultiplicity()));
}
for( final BaseEdge edge : inEdges ) {
addEdge(getEdgeSource(edge), addedVertex, new BaseEdge(edge.isRef(), edge.getMultiplicity()));
}
removeVertex(incomingVertex);
removeVertex(outgoingVertex);
return true;
}
}
return false;
}
/**
* Perform as many branch simplifications and merging operations as possible on this graph,
* modifying it in place.
*/
private void mergeBranchingNodes() {
boolean foundNodesToMerge = true;
while( foundNodesToMerge ) {
foundNodesToMerge = false;
for( final SeqVertex v : vertexSet() ) {
foundNodesToMerge = simplifyDiamond(v);
foundNodesToMerge = simplifyDiamondIfPossible(v);
if ( foundNodesToMerge )
break;
}
@ -153,8 +171,11 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
* \ | / /
* b
*
* @param v
* @return
* Only returns true if all outgoing edges of v go to vertices that all only connect to
* a single bottom node, and that all middle nodes have only the single edge
*
* @param v the vertex to test if its the top of a diamond pattern
* @return true if v is the root of a diamond
*/
protected boolean isRootOfDiamond(final SeqVertex v) {
final Set<BaseEdge> ve = outgoingEdgesOf(v);
@ -173,6 +194,7 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
if ( inDegreeOf(mi) != 1 )
return false;
// make sure that all outgoing vertices of mi go only to the bottom node
for ( final SeqVertex mt : outgoingVerticesOf(mi) ) {
if ( bottom == null )
bottom = mt;
@ -181,9 +203,24 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
}
}
// bottom has some connections coming in from other nodes, don't allow
if ( inDegreeOf(bottom) != ve.size() )
return false;
return true;
}
/**
* Return the longest suffix of bases shared among all provided vertices
*
* For example, if the vertices have sequences AC, CC, and ATC, this would return
* a single C. However, for ACC and TCC this would return CC. And for AC and TG this
* would return null;
*
* @param middleVertices a non-empty set of vertices
* @return
*/
@Requires("!middleVertices.isEmpty()")
private byte[] commonSuffixOfEdgeTargets(final Set<SeqVertex> middleVertices) {
final String[] kmers = new String[middleVertices.size()];
@ -196,6 +233,14 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
return commonPrefix.equals("") ? null : StringUtils.reverse(commonPrefix).getBytes();
}
/**
* Get the node that is the bottom of a diamond configuration in the graph starting at top
*
* @param top
* @return
*/
@Requires("top != null")
@Ensures({"result != null"})
private SeqVertex getDiamondBottom(final SeqVertex top) {
final BaseEdge topEdge = outgoingEdgesOf(top).iterator().next();
final SeqVertex middle = getEdgeTarget(topEdge);
@ -203,6 +248,13 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
return getEdgeTarget(middleEdge);
}
/**
* Get the set of vertices that are in the middle of a diamond starting at top
* @param top
* @return
*/
@Requires("top != null")
@Ensures({"result != null", "!result.isEmpty()"})
final Set<SeqVertex> getMiddleVertices(final SeqVertex top) {
final Set<SeqVertex> middles = new HashSet<SeqVertex>();
for ( final BaseEdge topToMiddle : outgoingEdgesOf(top) ) {
@ -211,7 +263,26 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
return middles;
}
private boolean simplifyDiamond(final SeqVertex top) {
/**
* Simply a diamond configuration in the current graph starting at top, if possible
*
* If top is actually the top of a diamond that can be simplified (i.e., doesn't have any
* random edges or other structure that would cause problems with the transformation), then this code
* performs the following transformation on this graph (modifying it):
*
* A -> M1 -> B, A -> M2 -> B, A -> Mn -> B
*
* becomes
*
* A -> M1' -> B', A -> M2' -> B', A -> Mn' -> B'
*
* where B' is composed of the longest common suffix of all Mi nodes + B, and Mi' are each
* middle vertex without their shared suffix.
*
* @param top a proposed vertex in this graph that might start a diamond (but doesn't have to)
* @return true top actually starts a diamond and it could be simplified
*/
private boolean simplifyDiamondIfPossible(final SeqVertex top) {
if ( ! isRootOfDiamond(top) )
return false;

View File

@ -58,8 +58,7 @@ public class DeBruijnVertexUnitTest extends BaseTest {
Assert.assertEquals(v.getSequence(), bases);
Assert.assertEquals(v.getSequenceString(), new String(bases));
Assert.assertEquals(v.length(), bases.length);
Assert.assertEquals(v.getSuffix().length, 1);
Assert.assertEquals(v.getSuffix()[0], (byte)'T');
Assert.assertEquals(v.getSuffix(), (byte)'T');
Assert.assertEquals(v.getSuffixString(), "T");
Assert.assertEquals(v.getAdditionalSequence(true), bases);

View File

@ -51,6 +51,10 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SeqGraphUnitTest extends BaseTest {
private class MergeNodesWithNoVariationTestProvider extends TestDataProvider {
public byte[] sequence;
@ -75,7 +79,7 @@ public class SeqGraphUnitTest extends BaseTest {
deBruijnGraph.addKmersToGraph(kmer1, kmer2, false, 1);
}
final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph();
seqGraph.mergeNodes();
seqGraph.simplifyGraph();
return seqGraph;
}
}
@ -103,4 +107,208 @@ public class SeqGraphUnitTest extends BaseTest {
final SeqVertex actualV = actual.vertexSet().iterator().next();
Assert.assertEquals(actualV.getSequence(), cfg.sequence);
}
@DataProvider(name = "IsDiamondData")
public Object[][] makeIsDiamondData() throws Exception {
List<Object[]> tests = new ArrayList<Object[]>();
SeqGraph graph;
SeqVertex pre1, pre2, top, middle1, middle2, middle3, bottom, tail1, tail2;
graph = new SeqGraph();
pre1 = new SeqVertex("ACT");
pre2 = new SeqVertex("AGT");
top = new SeqVertex("A");
middle1 = new SeqVertex("CT");
middle2 = new SeqVertex("CG");
middle3 = new SeqVertex("CA");
bottom = new SeqVertex("AA");
tail1 = new SeqVertex("GC");
tail2 = new SeqVertex("GC");
graph.addVertices(pre1, pre2, top, middle1, middle2, middle3, bottom, tail1, tail2);
graph.addEdges(pre1, top, middle1, bottom, tail1);
graph.addEdges(pre2, top, middle2, bottom, tail1);
graph.addEdges(top, middle3, bottom);
graph.addEdges(bottom, tail2);
for ( final SeqVertex no : Arrays.asList(pre1, pre2, middle1, middle2, middle3, bottom, tail1, tail2)) {
tests.add(new Object[]{graph, no, false});
}
tests.add(new Object[]{graph, top, true});
final SeqGraph danglingMiddleGraph = (SeqGraph)graph.clone();
final SeqVertex danglingMiddle = new SeqVertex("A");
danglingMiddleGraph.addVertex(danglingMiddle);
danglingMiddleGraph.addEdge(top, danglingMiddle);
tests.add(new Object[]{danglingMiddleGraph, top, false});
final SeqGraph strangerToBottom = (SeqGraph)graph.clone();
final SeqVertex notAttachedToTop = new SeqVertex("A");
strangerToBottom.addVertex(notAttachedToTop);
strangerToBottom.addEdge(notAttachedToTop, bottom);
tests.add(new Object[]{strangerToBottom, top, false});
final SeqGraph strangerToMiddle = (SeqGraph)graph.clone();
final SeqVertex attachedToMiddle = new SeqVertex("A");
strangerToMiddle.addVertex(attachedToMiddle);
strangerToMiddle.addEdge(attachedToMiddle, middle1);
tests.add(new Object[]{strangerToMiddle, top, false});
// middle1 has outgoing edge to non-bottom
final SeqGraph middleExtraOut = (SeqGraph)graph.clone();
final SeqVertex fromMiddle = new SeqVertex("A");
middleExtraOut.addVertex(fromMiddle);
middleExtraOut.addEdge(middle1, fromMiddle);
tests.add(new Object[]{middleExtraOut, top, false});
// top connects to bottom directly as well
{
final SeqGraph topConnectsToBottomToo = new SeqGraph();
final SeqVertex top2 = new SeqVertex("A");
final SeqVertex middle4 = new SeqVertex("C");
final SeqVertex bottom2 = new SeqVertex("G");
topConnectsToBottomToo.addVertices(top2, middle4, bottom2);
topConnectsToBottomToo.addEdges(top2, middle4, bottom2);
topConnectsToBottomToo.addEdges(top2, bottom2);
tests.add(new Object[]{topConnectsToBottomToo, top2, false});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "IsDiamondData", enabled = true)
public void testIsDiamond(final SeqGraph graph, final SeqVertex v, final boolean isRootOfDiamond) {
Assert.assertEquals(graph.isRootOfDiamond(v), isRootOfDiamond);
}
@DataProvider(name = "MergingData")
public Object[][] makeMergingData() throws Exception {
List<Object[]> tests = new ArrayList<Object[]>();
final SeqGraph graph = new SeqGraph();
SeqVertex pre1 = new SeqVertex("ACT");
SeqVertex pre2 = new SeqVertex("AGT");
SeqVertex top = new SeqVertex("A");
SeqVertex middle1 = new SeqVertex("GC");
SeqVertex middle2 = new SeqVertex("TC");
SeqVertex middle3 = new SeqVertex("AC");
SeqVertex middle4 = new SeqVertex("GCAC");
SeqVertex bottom = new SeqVertex("AA");
SeqVertex tail1 = new SeqVertex("GC");
SeqVertex tail2 = new SeqVertex("GC");
// just a single vertex
graph.addVertices(pre1);
tests.add(new Object[]{graph.clone(), graph.clone()});
// pre1 -> top = pre1 + top
{
graph.addVertices(top);
graph.addEdges(pre1, top);
final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString());
final SeqGraph expected = new SeqGraph();
expected.addVertex(pre1_top);
tests.add(new Object[]{graph.clone(), expected.clone()});
}
// pre1 -> top -> middle1 = pre1 + top + middle1
{
graph.addVertices(middle1);
graph.addEdges(top, middle1);
final SeqGraph expected = new SeqGraph();
final SeqVertex pre1_top_middle1 = new SeqVertex(pre1.getSequenceString() + top.getSequenceString() + middle1.getSequenceString());
expected.addVertex(pre1_top_middle1);
tests.add(new Object[]{graph.clone(), expected});
}
// pre1 -> top -> middle1 & top -> middle2 = pre1 + top -> middle1 & -> middle2
{
graph.addVertices(middle2);
graph.addEdges(top, middle2);
final SeqGraph expected = new SeqGraph();
final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString());
expected.addVertices(pre1_top, middle1, middle2);
expected.addEdges(pre1_top, middle1);
expected.addEdges(pre1_top, middle2);
tests.add(new Object[]{graph.clone(), expected});
}
// An actual diamond event to merge!
{
graph.addVertices(bottom);
graph.addEdges(middle1, bottom);
graph.addEdges(middle2, bottom);
final SeqGraph expected = new SeqGraph();
final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString());
final SeqVertex newMiddle1 = new SeqVertex("G");
final SeqVertex newMiddle2 = new SeqVertex("T");
final SeqVertex newBottom = new SeqVertex("C" + bottom.getSequenceString());
expected.addVertices(pre1_top, newMiddle1, newMiddle2, newBottom);
expected.addEdges(pre1_top, newMiddle1, newBottom);
expected.addEdges(pre1_top, newMiddle2, newBottom);
tests.add(new Object[]{graph.clone(), expected.clone()});
graph.addVertices(middle3);
graph.addEdges(top, middle3, bottom);
final SeqVertex newMiddle3 = new SeqVertex("A");
expected.addVertices(newMiddle3);
expected.addEdges(pre1_top, newMiddle3, newBottom);
tests.add(new Object[]{graph.clone(), expected.clone()});
graph.addVertices(middle4);
graph.addEdges(top, middle4, bottom);
final SeqVertex newMiddle4 = new SeqVertex("GCA");
expected.addVertices(newMiddle4);
expected.addEdges(pre1_top, newMiddle4, newBottom);
tests.add(new Object[]{graph.clone(), expected.clone()});
}
{
final SeqGraph all = new SeqGraph();
all.addVertices(pre1, pre2, top, middle1, middle2, bottom, tail1, tail2);
all.addEdges(pre1, top, middle1, bottom, tail1);
all.addEdges(pre2, top, middle2, bottom, tail2);
final SeqGraph expected = new SeqGraph();
final SeqVertex newMiddle1 = new SeqVertex("G");
final SeqVertex newMiddle2 = new SeqVertex("T");
final SeqVertex newBottom = new SeqVertex("C" + bottom.getSequenceString());
expected.addVertices(pre1, pre2, top, newMiddle1, newMiddle2, newBottom, tail1, tail2);
expected.addEdges(pre1, top, newMiddle1, newBottom, tail1);
expected.addEdges(pre2, top, newMiddle2, newBottom, tail2);
tests.add(new Object[]{all.clone(), expected.clone()});
}
// test the case where we delete a middle node away because the common sequence is all of its sequence
{
final SeqGraph graph2 = new SeqGraph();
final SeqVertex mytop = new SeqVertex("A");
final SeqVertex mid1 = new SeqVertex("AC");
final SeqVertex mid2 = new SeqVertex("C");
final SeqVertex bot = new SeqVertex("G");
graph2.addVertices(mytop, mid1, mid2, bot);
graph2.addEdges(mytop, mid1, bot);
graph2.addEdges(mytop, mid2, bot);
final SeqGraph expected = new SeqGraph();
final SeqVertex newMid1 = new SeqVertex("A");
final SeqVertex newBottom = new SeqVertex("CG");
expected.addVertices(mytop, newMid1, newBottom);
expected.addEdges(mytop, newMid1, newBottom);
expected.addEdges(mytop, newBottom);
tests.add(new Object[]{graph2, expected});
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "MergingData", enabled = true)
public void testMerging(final SeqGraph graph, final SeqGraph expected) {
final SeqGraph merged = (SeqGraph)graph.clone();
merged.simplifyGraph();
Assert.assertTrue(SeqGraph.graphEquals(merged, expected));
}
}