diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java index 053f0e1a1..7b5fd2bbd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java @@ -116,14 +116,21 @@ public class BaseEdge { this.isRef = isRef; } - // For use when comparing edges pulled from the same graph - public boolean equals( final BaseGraph graph, final BaseEdge edge ) { + /** + * Does thisĀ and edge have the same source and target vertices in graph? + * + * @param graph the graph containing both this and edge + * @param edge our comparator edge + * @param + * @return true if we have the same source and target vertices + */ + public boolean hasSameSourceAndTarget(final BaseGraph graph, final BaseEdge edge) { return (graph.getEdgeSource(this).equals(graph.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph.getEdgeTarget(edge))); } // For use when comparing edges across graphs! - public boolean equals( final BaseGraph graph, final BaseEdge edge, final BaseGraph graph2 ) { - return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge))); + public boolean seqEquals( final BaseGraph graph, final BaseEdge edge, final BaseGraph graph2 ) { + return (graph.getEdgeSource(this).seqEquals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).seqEquals(graph2.getEdgeTarget(edge))); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java index 6aa687312..ec5c99bb1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java @@ -310,6 +310,19 @@ public class BaseGraph extends DefaultDirectedGraph extends DefaultDirectedGraph the type of the nodes in those graphs + * @return true if g1 and g2 are equals + */ public static boolean graphEquals(final BaseGraph g1, BaseGraph g2) { - if( !(g1.vertexSet().containsAll(g2.vertexSet()) && g2.vertexSet().containsAll(g1.vertexSet())) ) { + final Set vertices1 = g1.vertexSet(); + final Set vertices2 = g2.vertexSet(); + final Set edges1 = g1.edgeSet(); + final Set edges2 = g2.edgeSet(); + + if ( vertices1.size() != vertices2.size() || edges1.size() != edges2.size() ) return false; + + for ( final T v1 : vertices1 ) { + boolean found = false; + for ( final T v2 : vertices2 ) + found = found || v1.getSequenceString().equals(v2.getSequenceString()); + if ( ! found ) return false; } - for( BaseEdge e1 : g1.edgeSet() ) { + + for( final BaseEdge e1 : g1.edgeSet() ) { boolean found = false; for( BaseEdge e2 : g2.edgeSet() ) { - if( e1.equals(g1, e2, g2) ) { found = true; break; } + if( e1.seqEquals(g1, e2, g2) ) { found = true; break; } } if( !found ) { return false; } } - for( BaseEdge e2 : g2.edgeSet() ) { + for( final BaseEdge e2 : g2.edgeSet() ) { boolean found = false; for( BaseEdge e1 : g1.edgeSet() ) { - if( e2.equals(g2, e1, g1) ) { found = true; break; } + if( e2.seqEquals(g2, e1, g1) ) { found = true; break; } } if( !found ) { return false; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseVertex.java index fad7a51d1..b6d278105 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseVertex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseVertex.java @@ -99,6 +99,16 @@ public class BaseVertex { return true; } + /** + * Are b and this equal according to their base sequences? + * + * @param b the vertex to compare ourselves to + * @return true if b and this have the same sequence, regardless of other attributes that might differentiate them + */ + public boolean seqEquals(final BaseVertex b) { + return Arrays.equals(this.getSequence(), b.getSequence()); + } + @Override public int hashCode() { // necessary to override here so that graph.containsVertex() works the same way as vertex.equals() as one might expect return Arrays.hashCode(sequence); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 688d5336e..6d295ff97 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -194,15 +194,10 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph(); if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.1.dot"), PRUNE_FACTOR); seqGraph.pruneGraph(PRUNE_FACTOR); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.pruned.dot"), PRUNE_FACTOR); - seqGraph.mergeNodes(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.merged.preclean.dot"), PRUNE_FACTOR); seqGraph.removeVerticesNotConnectedToRef(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.4.merged.dot"), PRUNE_FACTOR); - seqGraph.mergeBranchingNodes(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.simplified.dot"), PRUNE_FACTOR); - seqGraph.mergeNodes(); - if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.6.simplified.merged.dot"), PRUNE_FACTOR); + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.pruned.dot"), PRUNE_FACTOR); + seqGraph.simplifyGraph(); + if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.3.merged.dot"), PRUNE_FACTOR); return seqGraph; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java index 47716b7c5..0a2c26ca4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertex.java @@ -47,17 +47,20 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; - -import java.util.Arrays; /** * simple node class for storing kmer sequences * - * User: ebanks + * User: ebanks, mdepristo * Date: Mar 23, 2011 */ public class DeBruijnVertex extends BaseVertex { + private final static byte[][] sufficesAsByteArray = new byte[256][]; + static { + for ( int i = 0; i < sufficesAsByteArray.length; i++ ) + sufficesAsByteArray[i] = new byte[]{(byte)(i & 0xFF)}; + } + public DeBruijnVertex( final byte[] sequence ) { super(sequence); } @@ -85,17 +88,38 @@ public class DeBruijnVertex extends BaseVertex { */ @Ensures({"result != null", "result.length() >= 1"}) public String getSuffixString() { - return new String(getSuffix()); + return new String(getSuffixAsArray()); } - @Ensures("result != null") - // TODO this could be replaced with byte as the suffix is guarenteed to be exactly 1 base - public byte[] getSuffix() { - return Arrays.copyOfRange( sequence, getKmer() - 1, sequence.length ); + /** + * Get the suffix byte of this DeBruijnVertex + * + * The suffix byte is simply the last byte of the kmer sequence, so if this is holding sequence ACT + * getSuffix would return T + * + * @return a byte + */ + public byte getSuffix() { + return sequence[getKmer() - 1]; } + /** + * Optimized version that returns a byte[] for the single byte suffix of this graph without allocating memory. + * + * Should not be modified + * + * @return a byte[] that contains 1 byte == getSuffix() + */ + @Ensures({"result != null", "result.length == 1", "result[0] == getSuffix()"}) + private byte[] getSuffixAsArray() { + return sufficesAsByteArray[getSuffix()]; + } + + /** + * {@inheritDoc} + */ @Override public byte[] getAdditionalSequence(boolean source) { - return source ? super.getAdditionalSequence(source) : getSuffix(); + return source ? super.getAdditionalSequence(source) : getSuffixAsArray(); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Path.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Path.java index 895cffcca..7546155a6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Path.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/Path.java @@ -254,7 +254,7 @@ class Path { final BubbleStateMachine bsm = new BubbleStateMachine(cigar); for( final BaseEdge e : getEdges() ) { - if( e.equals(graph, edgesInOrder.getFirst()) ) { + if ( e.hasSameSourceAndTarget(graph, edgesInOrder.getFirst()) ) { advanceBubbleStateMachine( bsm, graph.getEdgeSource(e), null ); } advanceBubbleStateMachine( bsm, graph.getEdgeTarget(e), e ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java index 960f2cdd7..f67815b92 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java @@ -46,6 +46,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; @@ -77,67 +79,83 @@ public class SeqGraph extends BaseGraph { super(kmer); } - protected void mergeNodes() { + /** + * Simplify this graph, merging vertices together and restructuring the graph in an + * effort to minimize the number of overall vertices in the graph without changing + * in any way the sequences implied by a complex enumeration of all paths through the graph. + */ + public void simplifyGraph() { + zipLinearChains(); + mergeBranchingNodes(); zipLinearChains(); } + /** + * Zip up all of the simple linear chains present in this graph. + */ protected void zipLinearChains() { - boolean foundNodesToMerge = true; - while( foundNodesToMerge ) { - foundNodesToMerge = false; - - for( final BaseEdge e : edgeSet() ) { - final SeqVertex outgoingVertex = getEdgeTarget(e); - final SeqVertex incomingVertex = getEdgeSource(e); - if( !outgoingVertex.equals(incomingVertex) - && outDegreeOf(incomingVertex) == 1 && inDegreeOf(outgoingVertex) == 1 - && isReferenceNode(incomingVertex) == isReferenceNode(outgoingVertex) ) { - - final Set outEdges = outgoingEdgesOf(outgoingVertex); - final Set inEdges = incomingEdgesOf(incomingVertex); - if( inEdges.size() == 1 && outEdges.size() == 1 ) { - inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); - outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); - } else if( inEdges.size() == 1 ) { - inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); - } else if( outEdges.size() == 1 ) { - outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); - } - - final SeqVertex addedVertex = new SeqVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSequence()) ); - addVertex(addedVertex); - for( final BaseEdge edge : outEdges ) { - addEdge(addedVertex, getEdgeTarget(edge), new BaseEdge(edge.isRef(), edge.getMultiplicity())); - } - for( final BaseEdge edge : inEdges ) { - addEdge(getEdgeSource(edge), addedVertex, new BaseEdge(edge.isRef(), edge.getMultiplicity())); - } - - removeVertex(incomingVertex); - removeVertex(outgoingVertex); - foundNodesToMerge = true; - break; - } - } + while( zipOneLinearChain() ) { + // just keep going until zipOneLinearChain says its done } } - // - // X -> ABC -> Y - // -> aBC -> Y - // - // becomes - // - // X -> A -> BCY - // -> a -> BCY - // - public void mergeBranchingNodes() { + /** + * Merge together two vertices in the graph v1 -> v2 into a single vertex v' containing v1 + v2 sequence + * + * Only works on vertices where v1's only outgoing edge is to v2 and v2's only incoming edge is from v1. + * + * If such a pair of vertices is found, they are merged and the graph is update. Otherwise nothing is changed. + * + * @return true if any such pair of vertices could be found, false otherwise + */ + protected boolean zipOneLinearChain() { + for( final BaseEdge e : edgeSet() ) { + final SeqVertex outgoingVertex = getEdgeTarget(e); + final SeqVertex incomingVertex = getEdgeSource(e); + if( !outgoingVertex.equals(incomingVertex) + && outDegreeOf(incomingVertex) == 1 && inDegreeOf(outgoingVertex) == 1 + && isReferenceNode(incomingVertex) == isReferenceNode(outgoingVertex) ) { + + final Set outEdges = outgoingEdgesOf(outgoingVertex); + final Set inEdges = incomingEdgesOf(incomingVertex); + if( inEdges.size() == 1 && outEdges.size() == 1 ) { + inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); + outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) ); + } else if( inEdges.size() == 1 ) { + inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); + } else if( outEdges.size() == 1 ) { + outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) ); + } + + final SeqVertex addedVertex = new SeqVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSequence()) ); + addVertex(addedVertex); + for( final BaseEdge edge : outEdges ) { + addEdge(addedVertex, getEdgeTarget(edge), new BaseEdge(edge.isRef(), edge.getMultiplicity())); + } + for( final BaseEdge edge : inEdges ) { + addEdge(getEdgeSource(edge), addedVertex, new BaseEdge(edge.isRef(), edge.getMultiplicity())); + } + + removeVertex(incomingVertex); + removeVertex(outgoingVertex); + return true; + } + } + + return false; + } + + /** + * Perform as many branch simplifications and merging operations as possible on this graph, + * modifying it in place. + */ + private void mergeBranchingNodes() { boolean foundNodesToMerge = true; while( foundNodesToMerge ) { foundNodesToMerge = false; for( final SeqVertex v : vertexSet() ) { - foundNodesToMerge = simplifyDiamond(v); + foundNodesToMerge = simplifyDiamondIfPossible(v); if ( foundNodesToMerge ) break; } @@ -153,8 +171,11 @@ public class SeqGraph extends BaseGraph { * \ | / / * b * - * @param v - * @return + * Only returns true if all outgoing edges of v go to vertices that all only connect to + * a single bottom node, and that all middle nodes have only the single edge + * + * @param v the vertex to test if its the top of a diamond pattern + * @return true if v is the root of a diamond */ protected boolean isRootOfDiamond(final SeqVertex v) { final Set ve = outgoingEdgesOf(v); @@ -173,6 +194,7 @@ public class SeqGraph extends BaseGraph { if ( inDegreeOf(mi) != 1 ) return false; + // make sure that all outgoing vertices of mi go only to the bottom node for ( final SeqVertex mt : outgoingVerticesOf(mi) ) { if ( bottom == null ) bottom = mt; @@ -181,9 +203,24 @@ public class SeqGraph extends BaseGraph { } } + // bottom has some connections coming in from other nodes, don't allow + if ( inDegreeOf(bottom) != ve.size() ) + return false; + return true; } + /** + * Return the longest suffix of bases shared among all provided vertices + * + * For example, if the vertices have sequences AC, CC, and ATC, this would return + * a single C. However, for ACC and TCC this would return CC. And for AC and TG this + * would return null; + * + * @param middleVertices a non-empty set of vertices + * @return + */ + @Requires("!middleVertices.isEmpty()") private byte[] commonSuffixOfEdgeTargets(final Set middleVertices) { final String[] kmers = new String[middleVertices.size()]; @@ -196,6 +233,14 @@ public class SeqGraph extends BaseGraph { return commonPrefix.equals("") ? null : StringUtils.reverse(commonPrefix).getBytes(); } + /** + * Get the node that is the bottom of a diamond configuration in the graph starting at top + * + * @param top + * @return + */ + @Requires("top != null") + @Ensures({"result != null"}) private SeqVertex getDiamondBottom(final SeqVertex top) { final BaseEdge topEdge = outgoingEdgesOf(top).iterator().next(); final SeqVertex middle = getEdgeTarget(topEdge); @@ -203,6 +248,13 @@ public class SeqGraph extends BaseGraph { return getEdgeTarget(middleEdge); } + /** + * Get the set of vertices that are in the middle of a diamond starting at top + * @param top + * @return + */ + @Requires("top != null") + @Ensures({"result != null", "!result.isEmpty()"}) final Set getMiddleVertices(final SeqVertex top) { final Set middles = new HashSet(); for ( final BaseEdge topToMiddle : outgoingEdgesOf(top) ) { @@ -211,7 +263,26 @@ public class SeqGraph extends BaseGraph { return middles; } - private boolean simplifyDiamond(final SeqVertex top) { + /** + * Simply a diamond configuration in the current graph starting at top, if possible + * + * If top is actually the top of a diamond that can be simplified (i.e., doesn't have any + * random edges or other structure that would cause problems with the transformation), then this code + * performs the following transformation on this graph (modifying it): + * + * A -> M1 -> B, A -> M2 -> B, A -> Mn -> B + * + * becomes + * + * A -> M1' -> B', A -> M2' -> B', A -> Mn' -> B' + * + * where B' is composed of the longest common suffix of all Mi nodes + B, and Mi' are each + * middle vertex without their shared suffix. + * + * @param top a proposed vertex in this graph that might start a diamond (but doesn't have to) + * @return true top actually starts a diamond and it could be simplified + */ + private boolean simplifyDiamondIfPossible(final SeqVertex top) { if ( ! isRootOfDiamond(top) ) return false; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertexUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertexUnitTest.java index 2db35e173..dfbe50668 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertexUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnVertexUnitTest.java @@ -58,8 +58,7 @@ public class DeBruijnVertexUnitTest extends BaseTest { Assert.assertEquals(v.getSequence(), bases); Assert.assertEquals(v.getSequenceString(), new String(bases)); Assert.assertEquals(v.length(), bases.length); - Assert.assertEquals(v.getSuffix().length, 1); - Assert.assertEquals(v.getSuffix()[0], (byte)'T'); + Assert.assertEquals(v.getSuffix(), (byte)'T'); Assert.assertEquals(v.getSuffixString(), "T"); Assert.assertEquals(v.getAdditionalSequence(true), bases); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java index b5089e878..c63996d66 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java @@ -51,6 +51,10 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + public class SeqGraphUnitTest extends BaseTest { private class MergeNodesWithNoVariationTestProvider extends TestDataProvider { public byte[] sequence; @@ -75,7 +79,7 @@ public class SeqGraphUnitTest extends BaseTest { deBruijnGraph.addKmersToGraph(kmer1, kmer2, false, 1); } final SeqGraph seqGraph = deBruijnGraph.convertToSequenceGraph(); - seqGraph.mergeNodes(); + seqGraph.simplifyGraph(); return seqGraph; } } @@ -103,4 +107,208 @@ public class SeqGraphUnitTest extends BaseTest { final SeqVertex actualV = actual.vertexSet().iterator().next(); Assert.assertEquals(actualV.getSequence(), cfg.sequence); } + + @DataProvider(name = "IsDiamondData") + public Object[][] makeIsDiamondData() throws Exception { + List tests = new ArrayList(); + + SeqGraph graph; + SeqVertex pre1, pre2, top, middle1, middle2, middle3, bottom, tail1, tail2; + + graph = new SeqGraph(); + + pre1 = new SeqVertex("ACT"); + pre2 = new SeqVertex("AGT"); + top = new SeqVertex("A"); + middle1 = new SeqVertex("CT"); + middle2 = new SeqVertex("CG"); + middle3 = new SeqVertex("CA"); + bottom = new SeqVertex("AA"); + tail1 = new SeqVertex("GC"); + tail2 = new SeqVertex("GC"); + + graph.addVertices(pre1, pre2, top, middle1, middle2, middle3, bottom, tail1, tail2); + graph.addEdges(pre1, top, middle1, bottom, tail1); + graph.addEdges(pre2, top, middle2, bottom, tail1); + graph.addEdges(top, middle3, bottom); + graph.addEdges(bottom, tail2); + + for ( final SeqVertex no : Arrays.asList(pre1, pre2, middle1, middle2, middle3, bottom, tail1, tail2)) { + tests.add(new Object[]{graph, no, false}); + } + tests.add(new Object[]{graph, top, true}); + + final SeqGraph danglingMiddleGraph = (SeqGraph)graph.clone(); + final SeqVertex danglingMiddle = new SeqVertex("A"); + danglingMiddleGraph.addVertex(danglingMiddle); + danglingMiddleGraph.addEdge(top, danglingMiddle); + tests.add(new Object[]{danglingMiddleGraph, top, false}); + + final SeqGraph strangerToBottom = (SeqGraph)graph.clone(); + final SeqVertex notAttachedToTop = new SeqVertex("A"); + strangerToBottom.addVertex(notAttachedToTop); + strangerToBottom.addEdge(notAttachedToTop, bottom); + tests.add(new Object[]{strangerToBottom, top, false}); + + final SeqGraph strangerToMiddle = (SeqGraph)graph.clone(); + final SeqVertex attachedToMiddle = new SeqVertex("A"); + strangerToMiddle.addVertex(attachedToMiddle); + strangerToMiddle.addEdge(attachedToMiddle, middle1); + tests.add(new Object[]{strangerToMiddle, top, false}); + + // middle1 has outgoing edge to non-bottom + final SeqGraph middleExtraOut = (SeqGraph)graph.clone(); + final SeqVertex fromMiddle = new SeqVertex("A"); + middleExtraOut.addVertex(fromMiddle); + middleExtraOut.addEdge(middle1, fromMiddle); + tests.add(new Object[]{middleExtraOut, top, false}); + + // top connects to bottom directly as well + { + final SeqGraph topConnectsToBottomToo = new SeqGraph(); + final SeqVertex top2 = new SeqVertex("A"); + final SeqVertex middle4 = new SeqVertex("C"); + final SeqVertex bottom2 = new SeqVertex("G"); + topConnectsToBottomToo.addVertices(top2, middle4, bottom2); + topConnectsToBottomToo.addEdges(top2, middle4, bottom2); + topConnectsToBottomToo.addEdges(top2, bottom2); + tests.add(new Object[]{topConnectsToBottomToo, top2, false}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "IsDiamondData", enabled = true) + public void testIsDiamond(final SeqGraph graph, final SeqVertex v, final boolean isRootOfDiamond) { + Assert.assertEquals(graph.isRootOfDiamond(v), isRootOfDiamond); + } + + @DataProvider(name = "MergingData") + public Object[][] makeMergingData() throws Exception { + List tests = new ArrayList(); + + final SeqGraph graph = new SeqGraph(); + + SeqVertex pre1 = new SeqVertex("ACT"); + SeqVertex pre2 = new SeqVertex("AGT"); + SeqVertex top = new SeqVertex("A"); + SeqVertex middle1 = new SeqVertex("GC"); + SeqVertex middle2 = new SeqVertex("TC"); + SeqVertex middle3 = new SeqVertex("AC"); + SeqVertex middle4 = new SeqVertex("GCAC"); + SeqVertex bottom = new SeqVertex("AA"); + SeqVertex tail1 = new SeqVertex("GC"); + SeqVertex tail2 = new SeqVertex("GC"); + + // just a single vertex + graph.addVertices(pre1); + tests.add(new Object[]{graph.clone(), graph.clone()}); + + // pre1 -> top = pre1 + top + { + graph.addVertices(top); + graph.addEdges(pre1, top); + final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString()); + final SeqGraph expected = new SeqGraph(); + expected.addVertex(pre1_top); + tests.add(new Object[]{graph.clone(), expected.clone()}); + } + + // pre1 -> top -> middle1 = pre1 + top + middle1 + { + graph.addVertices(middle1); + graph.addEdges(top, middle1); + final SeqGraph expected = new SeqGraph(); + final SeqVertex pre1_top_middle1 = new SeqVertex(pre1.getSequenceString() + top.getSequenceString() + middle1.getSequenceString()); + expected.addVertex(pre1_top_middle1); + tests.add(new Object[]{graph.clone(), expected}); + } + + // pre1 -> top -> middle1 & top -> middle2 = pre1 + top -> middle1 & -> middle2 + { + graph.addVertices(middle2); + graph.addEdges(top, middle2); + final SeqGraph expected = new SeqGraph(); + final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString()); + expected.addVertices(pre1_top, middle1, middle2); + expected.addEdges(pre1_top, middle1); + expected.addEdges(pre1_top, middle2); + tests.add(new Object[]{graph.clone(), expected}); + } + + // An actual diamond event to merge! + { + graph.addVertices(bottom); + graph.addEdges(middle1, bottom); + graph.addEdges(middle2, bottom); + final SeqGraph expected = new SeqGraph(); + final SeqVertex pre1_top = new SeqVertex(pre1.getSequenceString() + top.getSequenceString()); + final SeqVertex newMiddle1 = new SeqVertex("G"); + final SeqVertex newMiddle2 = new SeqVertex("T"); + final SeqVertex newBottom = new SeqVertex("C" + bottom.getSequenceString()); + expected.addVertices(pre1_top, newMiddle1, newMiddle2, newBottom); + expected.addEdges(pre1_top, newMiddle1, newBottom); + expected.addEdges(pre1_top, newMiddle2, newBottom); + tests.add(new Object[]{graph.clone(), expected.clone()}); + + graph.addVertices(middle3); + graph.addEdges(top, middle3, bottom); + final SeqVertex newMiddle3 = new SeqVertex("A"); + expected.addVertices(newMiddle3); + expected.addEdges(pre1_top, newMiddle3, newBottom); + tests.add(new Object[]{graph.clone(), expected.clone()}); + + graph.addVertices(middle4); + graph.addEdges(top, middle4, bottom); + final SeqVertex newMiddle4 = new SeqVertex("GCA"); + expected.addVertices(newMiddle4); + expected.addEdges(pre1_top, newMiddle4, newBottom); + tests.add(new Object[]{graph.clone(), expected.clone()}); + } + + { + final SeqGraph all = new SeqGraph(); + all.addVertices(pre1, pre2, top, middle1, middle2, bottom, tail1, tail2); + all.addEdges(pre1, top, middle1, bottom, tail1); + all.addEdges(pre2, top, middle2, bottom, tail2); + + final SeqGraph expected = new SeqGraph(); + final SeqVertex newMiddle1 = new SeqVertex("G"); + final SeqVertex newMiddle2 = new SeqVertex("T"); + final SeqVertex newBottom = new SeqVertex("C" + bottom.getSequenceString()); + expected.addVertices(pre1, pre2, top, newMiddle1, newMiddle2, newBottom, tail1, tail2); + expected.addEdges(pre1, top, newMiddle1, newBottom, tail1); + expected.addEdges(pre2, top, newMiddle2, newBottom, tail2); + tests.add(new Object[]{all.clone(), expected.clone()}); + } + + // test the case where we delete a middle node away because the common sequence is all of its sequence + { + final SeqGraph graph2 = new SeqGraph(); + final SeqVertex mytop = new SeqVertex("A"); + final SeqVertex mid1 = new SeqVertex("AC"); + final SeqVertex mid2 = new SeqVertex("C"); + final SeqVertex bot = new SeqVertex("G"); + graph2.addVertices(mytop, mid1, mid2, bot); + graph2.addEdges(mytop, mid1, bot); + graph2.addEdges(mytop, mid2, bot); + + final SeqGraph expected = new SeqGraph(); + final SeqVertex newMid1 = new SeqVertex("A"); + final SeqVertex newBottom = new SeqVertex("CG"); + expected.addVertices(mytop, newMid1, newBottom); + expected.addEdges(mytop, newMid1, newBottom); + expected.addEdges(mytop, newBottom); + tests.add(new Object[]{graph2, expected}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "MergingData", enabled = true) + public void testMerging(final SeqGraph graph, final SeqGraph expected) { + final SeqGraph merged = (SeqGraph)graph.clone(); + merged.simplifyGraph(); + Assert.assertTrue(SeqGraph.graphEquals(merged, expected)); + } }