Fixed bug where dangling tail merging occasionally created a cycle in the graph.

Added unit tests to cover this case.  Delivers PT#66690470.
This commit is contained in:
Eric Banks 2014-03-03 22:42:56 -05:00
parent 4d69af189e
commit b99bf85ec8
2 changed files with 20 additions and 3 deletions

View File

@ -97,7 +97,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
/**
* Class to keep track of the important dangling chain merging data
*/
protected final class DanglingChainMergeHelper {
protected static final class DanglingChainMergeHelper {
final List<MultiDeBruijnVertex> danglingPath, referencePath;
final byte[] danglingPathString, referencePathString;
final Cigar cigar;
@ -222,7 +222,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
if ( numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY )
return false;
// the last element must be an M
// the first element must be an M
if ( requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M )
return false;
@ -263,6 +263,12 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
// another edge condition occurs here: if Smith-Waterman places the whole tail into an insertion then it will try to
// merge back to the LCA, which results in a cycle in the graph. So we do not want to merge in such a case.
if ( refIndexToMerge == 0 )
return 0;
// it's safe to merge now
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
return 1;

View File

@ -46,6 +46,8 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading;
import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
import org.broadinstitute.sting.utils.Utils;
@ -77,6 +79,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data
tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch
tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex
tests.add(new Object[]{"AAAAA", "XXXXX", "1M5I", false, -1}); // insertion
return tests.toArray(new Object[][]{});
}
@ -144,7 +147,15 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
}
}
@Test(enabled = true)
@Test
public void testWholeTailIsInsertion() {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(10);
final ReadThreadingGraph.DanglingChainMergeHelper result = new ReadThreadingGraph.DanglingChainMergeHelper(null, null, "AXXXXX".getBytes(), "AAAAAA".getBytes(), new TextCigarCodec().decode("5I1M"));
final int mergeResult = rtgraph.mergeDanglingTail(result);
Assert.assertEquals(mergeResult, 0);
}
@Test
public void testGetBasesForPath() {
final int kmerSize = 4;