diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java index 7b5fd2bbd..d49b63672 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java @@ -143,4 +143,18 @@ public class BaseEdge { return edge2.multiplicity - edge1.multiplicity; } } + + /** + * Add edge to this edge, updating isRef and multiplicity as appropriate + * + * isRef is simply the or of this and edge + * multiplicity is the sum + * + * @param edge the edge to add + */ + public void add(final BaseEdge edge) { + if ( edge == null ) throw new IllegalArgumentException("edge cannot be null"); + this.multiplicity += edge.getMultiplicity(); + this.isRef = this.isRef || edge.isRef(); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java index ec5c99bb1..c77ec4222 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java @@ -47,11 +47,11 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.jgrapht.EdgeFactory; import org.jgrapht.graph.DefaultDirectedGraph; -import org.jgrapht.traverse.DepthFirstIterator; import java.io.File; import java.io.FileNotFoundException; @@ -64,7 +64,7 @@ import java.util.*; * User: rpoplin * Date: 2/6/13 */ - +@Invariant("!this.isAllowingMultipleEdges()") public class BaseGraph extends DefaultDirectedGraph { protected final static Logger logger = Logger.getLogger(BaseGraph.class); private final int kmerSize; @@ -513,4 +513,19 @@ public class BaseGraph extends DefaultDirectedGraph edges = getAllEdges(source, target); + return edges.isEmpty() ? null : edges.iterator().next(); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java index f67815b92..b855390c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java @@ -51,6 +51,7 @@ import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; +import java.io.File; import java.util.*; /** @@ -149,7 +150,7 @@ public class SeqGraph extends BaseGraph { * Perform as many branch simplifications and merging operations as possible on this graph, * modifying it in place. */ - private void mergeBranchingNodes() { + protected void mergeBranchingNodes() { boolean foundNodesToMerge = true; while( foundNodesToMerge ) { foundNodesToMerge = false; @@ -288,61 +289,48 @@ public class SeqGraph extends BaseGraph { final SeqVertex diamondBottom = getDiamondBottom(top); final Set middleVertices = getMiddleVertices(top); - final List verticesToRemove = new LinkedList(); final List edgesToRemove = new LinkedList(); // all of the edges point to the same sink, so it's time to merge final byte[] commonSuffix = commonSuffixOfEdgeTargets(middleVertices); if ( commonSuffix != null ) { - boolean newBottomEdgeIsRef = false; - int newBottomEdgeMultiplicity = 0; - + final BaseEdge botToNewBottom = new BaseEdge(false, 0); + final BaseEdge elimMiddleNodeEdge = new BaseEdge(false, 0); final SeqVertex newBottomV = new SeqVertex(commonSuffix); addVertex(newBottomV); for ( final SeqVertex middle : middleVertices ) { - boolean missingNodeEdgeIsRef = false; - int missingNodeMultiplicity = 0; final SeqVertex withoutSuffix = middle.withoutSuffix(commonSuffix); + final BaseEdge topToMiddleEdge = getEdge(top, middle); + final BaseEdge middleToBottomE = getEdge(middle, diamondBottom); - if ( withoutSuffix != null ) // this node is a deletion + // clip out the two edges, since we'll be replacing them later + edgesToRemove.add(topToMiddleEdge); + edgesToRemove.add(middleToBottomE); + + if ( withoutSuffix != null ) { // this node is a deletion addVertex(withoutSuffix); - - // update all edges from top -> middle to be top -> without suffix - for( final BaseEdge topToMiddleEdge : getAllEdges(top, middle) ) { - edgesToRemove.add(topToMiddleEdge); - missingNodeMultiplicity += topToMiddleEdge.getMultiplicity(); - missingNodeEdgeIsRef = missingNodeEdgeIsRef || topToMiddleEdge.isRef(); - if ( withoutSuffix != null ) // this node is a deletion - addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge.isRef(), topToMiddleEdge.getMultiplicity())); + // update edge from top -> middle to be top -> without suffix + addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge)); + addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE)); + } else { + // this middle node is == the common suffix, wo we're removing the edge + elimMiddleNodeEdge.add(topToMiddleEdge); } - - // reattached prefix to the new bottom V by updating all edges from middleV -> bottom - for ( final BaseEdge middleToBottomE : getAllEdges(middle, diamondBottom) ) { - missingNodeMultiplicity += middleToBottomE.getMultiplicity(); - missingNodeEdgeIsRef = missingNodeEdgeIsRef || middleToBottomE.isRef(); - - if ( withoutSuffix != null ) // this node is a deletion - addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE.isRef(), middleToBottomE.getMultiplicity())); - edgesToRemove.add(middleToBottomE); - - // update the info for the new bottom edge - newBottomEdgeIsRef = newBottomEdgeIsRef || middleToBottomE.isRef(); - newBottomEdgeMultiplicity += middleToBottomE.getMultiplicity(); - } - - if ( withoutSuffix == null ) // add an edge from top to new bottom - addEdge(top, newBottomV, new BaseEdge(missingNodeEdgeIsRef, missingNodeMultiplicity)); - + // include the ref and multi of mid -> bot in our edge from new bot -> bot + botToNewBottom.add(middleToBottomE); verticesToRemove.add(middle); } - addEdge(newBottomV, diamondBottom, new BaseEdge(newBottomEdgeIsRef, newBottomEdgeMultiplicity)); + // add an edge from top to new bottom, because some middle nodes were removed + if ( elimMiddleNodeEdge.getMultiplicity() > 0 ) + addEdge(top, newBottomV, elimMiddleNodeEdge); + + addEdge(newBottomV, diamondBottom, botToNewBottom); removeAllEdges(edgesToRemove); removeAllVertices(verticesToRemove); - return true; } else { return false; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java index c63996d66..83a4f4c50 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java @@ -51,6 +51,7 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -311,4 +312,33 @@ public class SeqGraphUnitTest extends BaseTest { merged.simplifyGraph(); Assert.assertTrue(SeqGraph.graphEquals(merged, expected)); } + + // A -> ACT -> C [non-ref] + // A -> ACT -> C [non-ref] + // A -> ACT -> C [ref] + // + // Should become A -> ACT -> C [ref and non-ref edges] + // + @Test + public void testBubbleSameBasesWithRef() { + final SeqGraph graph = new SeqGraph(); + final SeqVertex top = new SeqVertex("A"); + final SeqVertex mid1 = new SeqVertex("ACT"); + final SeqVertex mid2 = new SeqVertex("ACT"); + final SeqVertex bot = new SeqVertex("C"); + graph.addVertices(top, mid1, mid2, bot); + graph.addEdges(top, mid2, bot); + graph.addEdge(top, mid1, new BaseEdge(true, 1)); + graph.addEdge(mid1, bot, new BaseEdge(true, 1)); + + final SeqGraph expected = new SeqGraph(); + expected.addVertices(top, mid1, bot); + expected.addEdge(top, mid1, new BaseEdge(true, 2)); + expected.addEdge(mid1, bot, new BaseEdge(true, 2)); + + final SeqGraph actual = ((SeqGraph)graph.clone()); + actual.mergeBranchingNodes(); + + Assert.assertTrue(BaseGraph.graphEquals(actual, expected)); + } }