Intervals: fix bug where we could fail to find the intersection of unsorted/missorted interval lists
-The algorithm for finding the intersection of two sets of intervals relies on the sortedness of the intervals within each set, but the engine was not sorting the intervals before attempting to find the intersection. -The result was that if one or both interval lists was unsorted / lexicographically sorted, we would often fail to find the intersection correctly. -Now the IntervalBinding sorts all sets of intervals before returning them, solving the problem. -Added an integration test for this case. GSA-909 #resolve
This commit is contained in:
parent
791c386972
commit
5baf906c28
|
|
@ -98,6 +98,7 @@ public final class IntervalBinding<T extends Feature> {
|
|||
intervals = IntervalUtils.parseIntervalArguments(genomeLocParser, stringIntervals);
|
||||
}
|
||||
|
||||
Collections.sort(intervals);
|
||||
return intervals;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -285,4 +285,20 @@ public class IntervalIntegrationTest extends WalkerTest {
|
|||
Arrays.asList(md5));
|
||||
executeTest("testSymbolicAlleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntersectionOfLexicographicallySortedIntervals() {
|
||||
final String md5 = "18be9375e5a753f766616a51eb6131f0";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
" -T CountLoci" +
|
||||
" -I " + privateTestDir + "NA12878.4.snippet.bam" +
|
||||
" -R " + b37KGReference +
|
||||
" -L " + privateTestDir + "lexicographicallySortedIntervals.bed" +
|
||||
" -L 4" +
|
||||
" -isr INTERSECTION" +
|
||||
" -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testIntersectionOfLexicographicallySortedIntervals", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue