From 6d7d21ca47a35b9925db55401464a3e7d86d9418 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 20 Mar 2013 15:57:10 -0400 Subject: [PATCH] Bugfix for incorrect branch diamond merging algorithm -- Previous version was just incorrectly accumulating information about nodes that were completely eliminated by the common suffix, so we were dropping some reference connections between vertices. Fixed. In the process simplified the entire algorithm and codebase -- Resolves https://jira.broadinstitute.org/browse/GSA-884 --- .../walkers/haplotypecaller/BaseEdge.java | 14 +++++ .../walkers/haplotypecaller/BaseGraph.java | 19 +++++- .../walkers/haplotypecaller/SeqGraph.java | 60 ++++++++----------- .../haplotypecaller/SeqGraphUnitTest.java | 30 ++++++++++ 4 files changed, 85 insertions(+), 38 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java index 7b5fd2bbd..d49b63672 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseEdge.java @@ -143,4 +143,18 @@ public class BaseEdge { return edge2.multiplicity - edge1.multiplicity; } } + + /** + * Add edge to this edge, updating isRef and multiplicity as appropriate + * + * isRef is simply the or of this and edge + * multiplicity is the sum + * + * @param edge the edge to add + */ + public void add(final BaseEdge edge) { + if ( edge == null ) throw new IllegalArgumentException("edge cannot be null"); + this.multiplicity += edge.getMultiplicity(); + this.isRef = this.isRef || edge.isRef(); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java index ec5c99bb1..c77ec4222 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/BaseGraph.java @@ -47,11 +47,11 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.jgrapht.EdgeFactory; import org.jgrapht.graph.DefaultDirectedGraph; -import org.jgrapht.traverse.DepthFirstIterator; import java.io.File; import java.io.FileNotFoundException; @@ -64,7 +64,7 @@ import java.util.*; * User: rpoplin * Date: 2/6/13 */ - +@Invariant("!this.isAllowingMultipleEdges()") public class BaseGraph extends DefaultDirectedGraph { protected final static Logger logger = Logger.getLogger(BaseGraph.class); private final int kmerSize; @@ -513,4 +513,19 @@ public class BaseGraph extends DefaultDirectedGraph edges = getAllEdges(source, target); + return edges.isEmpty() ? null : edges.iterator().next(); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java index f67815b92..b855390c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraph.java @@ -51,6 +51,7 @@ import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; +import java.io.File; import java.util.*; /** @@ -149,7 +150,7 @@ public class SeqGraph extends BaseGraph { * Perform as many branch simplifications and merging operations as possible on this graph, * modifying it in place. */ - private void mergeBranchingNodes() { + protected void mergeBranchingNodes() { boolean foundNodesToMerge = true; while( foundNodesToMerge ) { foundNodesToMerge = false; @@ -288,61 +289,48 @@ public class SeqGraph extends BaseGraph { final SeqVertex diamondBottom = getDiamondBottom(top); final Set middleVertices = getMiddleVertices(top); - final List verticesToRemove = new LinkedList(); final List edgesToRemove = new LinkedList(); // all of the edges point to the same sink, so it's time to merge final byte[] commonSuffix = commonSuffixOfEdgeTargets(middleVertices); if ( commonSuffix != null ) { - boolean newBottomEdgeIsRef = false; - int newBottomEdgeMultiplicity = 0; - + final BaseEdge botToNewBottom = new BaseEdge(false, 0); + final BaseEdge elimMiddleNodeEdge = new BaseEdge(false, 0); final SeqVertex newBottomV = new SeqVertex(commonSuffix); addVertex(newBottomV); for ( final SeqVertex middle : middleVertices ) { - boolean missingNodeEdgeIsRef = false; - int missingNodeMultiplicity = 0; final SeqVertex withoutSuffix = middle.withoutSuffix(commonSuffix); + final BaseEdge topToMiddleEdge = getEdge(top, middle); + final BaseEdge middleToBottomE = getEdge(middle, diamondBottom); - if ( withoutSuffix != null ) // this node is a deletion + // clip out the two edges, since we'll be replacing them later + edgesToRemove.add(topToMiddleEdge); + edgesToRemove.add(middleToBottomE); + + if ( withoutSuffix != null ) { // this node is a deletion addVertex(withoutSuffix); - - // update all edges from top -> middle to be top -> without suffix - for( final BaseEdge topToMiddleEdge : getAllEdges(top, middle) ) { - edgesToRemove.add(topToMiddleEdge); - missingNodeMultiplicity += topToMiddleEdge.getMultiplicity(); - missingNodeEdgeIsRef = missingNodeEdgeIsRef || topToMiddleEdge.isRef(); - if ( withoutSuffix != null ) // this node is a deletion - addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge.isRef(), topToMiddleEdge.getMultiplicity())); + // update edge from top -> middle to be top -> without suffix + addEdge(top, withoutSuffix, new BaseEdge(topToMiddleEdge)); + addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE)); + } else { + // this middle node is == the common suffix, wo we're removing the edge + elimMiddleNodeEdge.add(topToMiddleEdge); } - - // reattached prefix to the new bottom V by updating all edges from middleV -> bottom - for ( final BaseEdge middleToBottomE : getAllEdges(middle, diamondBottom) ) { - missingNodeMultiplicity += middleToBottomE.getMultiplicity(); - missingNodeEdgeIsRef = missingNodeEdgeIsRef || middleToBottomE.isRef(); - - if ( withoutSuffix != null ) // this node is a deletion - addEdge(withoutSuffix, newBottomV, new BaseEdge(middleToBottomE.isRef(), middleToBottomE.getMultiplicity())); - edgesToRemove.add(middleToBottomE); - - // update the info for the new bottom edge - newBottomEdgeIsRef = newBottomEdgeIsRef || middleToBottomE.isRef(); - newBottomEdgeMultiplicity += middleToBottomE.getMultiplicity(); - } - - if ( withoutSuffix == null ) // add an edge from top to new bottom - addEdge(top, newBottomV, new BaseEdge(missingNodeEdgeIsRef, missingNodeMultiplicity)); - + // include the ref and multi of mid -> bot in our edge from new bot -> bot + botToNewBottom.add(middleToBottomE); verticesToRemove.add(middle); } - addEdge(newBottomV, diamondBottom, new BaseEdge(newBottomEdgeIsRef, newBottomEdgeMultiplicity)); + // add an edge from top to new bottom, because some middle nodes were removed + if ( elimMiddleNodeEdge.getMultiplicity() > 0 ) + addEdge(top, newBottomV, elimMiddleNodeEdge); + + addEdge(newBottomV, diamondBottom, botToNewBottom); removeAllEdges(edgesToRemove); removeAllVertices(verticesToRemove); - return true; } else { return false; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java index c63996d66..83a4f4c50 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SeqGraphUnitTest.java @@ -51,6 +51,7 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -311,4 +312,33 @@ public class SeqGraphUnitTest extends BaseTest { merged.simplifyGraph(); Assert.assertTrue(SeqGraph.graphEquals(merged, expected)); } + + // A -> ACT -> C [non-ref] + // A -> ACT -> C [non-ref] + // A -> ACT -> C [ref] + // + // Should become A -> ACT -> C [ref and non-ref edges] + // + @Test + public void testBubbleSameBasesWithRef() { + final SeqGraph graph = new SeqGraph(); + final SeqVertex top = new SeqVertex("A"); + final SeqVertex mid1 = new SeqVertex("ACT"); + final SeqVertex mid2 = new SeqVertex("ACT"); + final SeqVertex bot = new SeqVertex("C"); + graph.addVertices(top, mid1, mid2, bot); + graph.addEdges(top, mid2, bot); + graph.addEdge(top, mid1, new BaseEdge(true, 1)); + graph.addEdge(mid1, bot, new BaseEdge(true, 1)); + + final SeqGraph expected = new SeqGraph(); + expected.addVertices(top, mid1, bot); + expected.addEdge(top, mid1, new BaseEdge(true, 2)); + expected.addEdge(mid1, bot, new BaseEdge(true, 2)); + + final SeqGraph actual = ((SeqGraph)graph.clone()); + actual.mergeBranchingNodes(); + + Assert.assertTrue(BaseGraph.graphEquals(actual, expected)); + } }