Fixed bug where dangling tail merging occasionally created a cycle in the graph.

Added unit tests to cover this case.  Delivers PT#66690470.
This commit is contained in:
Eric Banks 2014-03-03 22:42:56 -05:00
parent 4d69af189e
commit b99bf85ec8
2 changed files with 20 additions and 3 deletions

View File

@ -97,7 +97,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
/** /**
* Class to keep track of the important dangling chain merging data * Class to keep track of the important dangling chain merging data
*/ */
protected final class DanglingChainMergeHelper { protected static final class DanglingChainMergeHelper {
final List<MultiDeBruijnVertex> danglingPath, referencePath; final List<MultiDeBruijnVertex> danglingPath, referencePath;
final byte[] danglingPathString, referencePathString; final byte[] danglingPathString, referencePathString;
final Cigar cigar; final Cigar cigar;
@ -222,7 +222,7 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
if ( numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY ) if ( numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY )
return false; return false;
// the last element must be an M // the first element must be an M
if ( requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M ) if ( requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M )
return false; return false;
@ -263,6 +263,12 @@ public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnV
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1); final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0); final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
// another edge condition occurs here: if Smith-Waterman places the whole tail into an insertion then it will try to
// merge back to the LCA, which results in a cycle in the graph. So we do not want to merge in such a case.
if ( refIndexToMerge == 0 )
return 0;
// it's safe to merge now
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1)); addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
return 1; return 1;

View File

@ -46,6 +46,8 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading; package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading;
import net.sf.samtools.Cigar;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
@ -77,6 +79,7 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data
tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch
tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex
tests.add(new Object[]{"AAAAA", "XXXXX", "1M5I", false, -1}); // insertion
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -144,7 +147,15 @@ public class DanglingChainMergingGraphUnitTest extends BaseTest {
} }
} }
@Test(enabled = true) @Test
public void testWholeTailIsInsertion() {
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(10);
final ReadThreadingGraph.DanglingChainMergeHelper result = new ReadThreadingGraph.DanglingChainMergeHelper(null, null, "AXXXXX".getBytes(), "AAAAAA".getBytes(), new TextCigarCodec().decode("5I1M"));
final int mergeResult = rtgraph.mergeDanglingTail(result);
Assert.assertEquals(mergeResult, 0);
}
@Test
public void testGetBasesForPath() { public void testGetBasesForPath() {
final int kmerSize = 4; final int kmerSize = 4;