diff --git a/build.xml b/build.xml
index beca6bce0..efefdd438 100644
--- a/build.xml
+++ b/build.xml
@@ -855,8 +855,8 @@
-
-
+
+
diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
index 41cbbe59f..2cfcc19a9 100644
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
@@ -333,28 +333,6 @@ public class IntervalUtils {
throw new UserException.BadArgumentValue("scatterParts", String.format("Only able to write contigs into %d of %d files.", fileIndex + 1, scatterParts.size()));
}
- /**
- * Splits an interval list into multiple sublists.
- * @param locs The genome locs to split.
- * @param splits The stop points for the genome locs returned by splitFixedIntervals.
- * @return A list of lists of genome locs, split according to splits
- */
- public static List> splitIntervalsToSubLists(List locs, List splits) {
- int locIndex = 1;
- int start = 0;
- List> sublists = new ArrayList>(splits.size());
- for (Integer stop: splits) {
- List curList = new ArrayList();
- for (int i = start; i < stop; i++)
- curList.add(locs.get(i));
- start = stop;
- sublists.add(curList);
- }
-
- return sublists;
- }
-
-
/**
* Splits an interval list into multiple files.
* @param fileHeader The sam file header.
@@ -384,39 +362,27 @@ public class IntervalUtils {
public static List> splitFixedIntervals(List locs, int numParts) {
if (locs.size() < numParts)
throw new UserException.BadArgumentValue("scatterParts", String.format("Cannot scatter %d locs into %d parts.", locs.size(), numParts));
+
final long locsSize = intervalSize(locs);
- final List splitPoints = new ArrayList();
- addFixedSplit(splitPoints, locs, locsSize, 0, locs.size(), numParts);
- Collections.sort(splitPoints);
- splitPoints.add(locs.size());
- return splitIntervalsToSubLists(locs, splitPoints);
- }
+ final double idealSplitSize = locsSize / numParts;
+ final List> splits = new ArrayList>(numParts);
+ final LinkedList remainingLocs = new LinkedList(locs);
- private static void addFixedSplit(List splitPoints, List locs, long locsSize, int startIndex, int stopIndex, int numParts) {
- if (numParts < 2)
- return;
- int halfParts = (numParts + 1) / 2;
- Pair splitPoint = getFixedSplit(locs, locsSize, startIndex, stopIndex, halfParts, numParts - halfParts);
- int splitIndex = splitPoint.first;
- long splitSize = splitPoint.second;
- splitPoints.add(splitIndex);
- addFixedSplit(splitPoints, locs, splitSize, startIndex, splitIndex, halfParts);
- addFixedSplit(splitPoints, locs, locsSize - splitSize, splitIndex, stopIndex, numParts - halfParts);
- }
+ for ( int i = 0; i < numParts; i++ ) {
+ long splitSize = 0;
+ List split = new ArrayList();
+ while ( ! remainingLocs.isEmpty() ) {
+ final GenomeLoc toAdd = remainingLocs.pop();
+ splitSize += toAdd.size();
+ split.add(toAdd);
+ final long nextEltSize = remainingLocs.isEmpty() ? 0 : remainingLocs.peek().size();
+ if ( splitSize + (i % 2 == 0 ? 0 : nextEltSize) > idealSplitSize )
+ break;
+ }
+ splits.add(split);
+ }
- private static Pair getFixedSplit(List locs, long locsSize, int startIndex, int stopIndex, int minLocs, int maxLocs) {
- int splitIndex = startIndex;
- long splitSize = 0;
- for (int i = 0; i < minLocs; i++) {
- splitSize += locs.get(splitIndex).size();
- splitIndex++;
- }
- long halfSize = locsSize / 2;
- while (splitIndex < (stopIndex - maxLocs) && splitSize < halfSize) {
- splitSize += locs.get(splitIndex).size();
- splitIndex++;
- }
- return new Pair(splitIndex, splitSize);
+ return splits;
}
/**
diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
index bd6bf9591..4809f1b5c 100644
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
@@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.interval;
import net.sf.picard.reference.ReferenceSequenceFile;
+import net.sf.picard.util.IntervalUtil;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
@@ -99,19 +100,26 @@ public class IntervalUtilsUnitTest extends BaseTest {
@DataProvider(name = "intervalslicingdata")
public Object[][] createTrees() {
-// new IntervalSlicingTest(1, 0);
-// new IntervalSlicingTest(2, 0.1);
- new IntervalSlicingTest(5, 0.1);
-// new IntervalSlicingTest(10, 0.1);
-// new IntervalSlicingTest(67, 0.1);
-// new IntervalSlicingTest(100, 0.1);
-// new IntervalSlicingTest(500, 0.1);
-// new IntervalSlicingTest(1000, 0.1);
+ new IntervalSlicingTest(1, 0);
+ new IntervalSlicingTest(2, 0.1);
+ new IntervalSlicingTest(3, 0.1);
+ new IntervalSlicingTest(7, 0.1);
+ new IntervalSlicingTest(10, 0.1);
+ new IntervalSlicingTest(31, 0.1);
+ new IntervalSlicingTest(67, 0.1);
+ new IntervalSlicingTest(100, 0.1);
+ new IntervalSlicingTest(127, 0.1);
+ // starts to become a bit less efficiency with larger cuts
+ new IntervalSlicingTest(500, 0.5);
+ new IntervalSlicingTest(1000, 1);
+ new IntervalSlicingTest(10000, 10);
return IntervalSlicingTest.getTests(IntervalSlicingTest.class);
}
@Test(dataProvider = "intervalslicingdata")
public void testFixedScatterIntervalsAlgorithm(IntervalSlicingTest test) {
+ Set locsSet = new HashSet(hg19exomeIntervals);
+ Set notFoundSet = new HashSet(hg19exomeIntervals);
List> splits = IntervalUtils.splitFixedIntervals(hg19exomeIntervals, test.parts);
long totalSize = IntervalUtils.intervalSize(hg19exomeIntervals);
@@ -122,501 +130,497 @@ public class IntervalUtilsUnitTest extends BaseTest {
for ( final List split : splits ) {
long splitSize = IntervalUtils.intervalSize(split);
double sigma = (splitSize - idealSplitSize) / (1.0 * idealSplitSize);
- logger.warn(String.format("Split %d size %d ideal %d sigma %.2f", counter, splitSize, idealSplitSize, sigma));
+ //logger.warn(String.format("Split %d size %d ideal %d sigma %.2f", counter, splitSize, idealSplitSize, sigma));
counter++;
sumOfSplitSizes += splitSize;
Assert.assertTrue(Math.abs(sigma) <= test.maxAllowableVariance, String.format("Interval %d (size %d ideal %d) has a variance %.2f outside of the tolerated range %.2f", counter, splitSize, idealSplitSize, sigma, test.maxAllowableVariance));
+
+ for ( final GenomeLoc loc : split ) {
+ Assert.assertTrue(locsSet.contains(loc), "Split location " + loc + " not found in set of input locs");
+ notFoundSet.remove(loc);
+ }
}
- Assert.assertEquals(totalSize, sumOfSplitSizes, "Split intervals don't contain the exact number of bases in the origianl intervals");
+ Assert.assertEquals(sumOfSplitSizes, totalSize, "Split intervals don't contain the exact number of bases in the original intervals");
+ Assert.assertTrue(notFoundSet.isEmpty(), "Not all intervals were present in the split set");
}
-// @Test(expectedExceptions=UserException.class)
-// public void testMergeListsBySetOperatorNoOverlap() {
-// // a couple of lists we'll use for the testing
-// List listEveryTwoFromOne = new ArrayList();
-// List listEveryTwoFromTwo = new ArrayList();
-//
-// // create the two lists we'll use
-// for (int x = 1; x < 101; x++) {
-// if (x % 2 == 0)
-// listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// else
-// listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// }
-//
-// List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
-// Assert.assertEquals(ret.size(), 100);
-// ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
-// Assert.assertEquals(ret.size(), 0);
-// }
-//
-// @Test
-// public void testMergeListsBySetOperatorAllOverlap() {
-// // a couple of lists we'll use for the testing
-// List allSites = new ArrayList();
-// List listEveryTwoFromTwo = new ArrayList();
-//
-// // create the two lists we'll use
-// for (int x = 1; x < 101; x++) {
-// if (x % 2 == 0)
-// listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// }
-//
-// List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
-// Assert.assertEquals(ret.size(), 150);
-// ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
-// Assert.assertEquals(ret.size(), 50);
-// }
-//
-// @Test
-// public void testMergeListsBySetOperator() {
-// // a couple of lists we'll use for the testing
-// List allSites = new ArrayList();
-// List listEveryTwoFromTwo = new ArrayList();
-//
-// // create the two lists we'll use
-// for (int x = 1; x < 101; x++) {
-// if (x % 5 == 0) {
-// listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
-// }
-// }
-//
-// List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
-// Assert.assertEquals(ret.size(), 40);
-// ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
-// Assert.assertEquals(ret.size(), 20);
-// }
-//
-// @Test
-// public void testGetContigLengths() {
-// Map lengths = IntervalUtils.getContigSizes(new File(BaseTest.hg18Reference));
-// Assert.assertEquals((long)lengths.get("chr1"), 247249719);
-// Assert.assertEquals((long)lengths.get("chr2"), 242951149);
-// Assert.assertEquals((long)lengths.get("chr3"), 199501827);
-// Assert.assertEquals((long)lengths.get("chr20"), 62435964);
-// Assert.assertEquals((long)lengths.get("chrX"), 154913754);
-// }
-//
-// @Test
-// public void testParseIntervalArguments() {
-// Assert.assertEquals(getLocs().size(), 45);
-// Assert.assertEquals(getLocs("chr1", "chr2", "chr3").size(), 3);
-// Assert.assertEquals(getLocs("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2").size(), 4);
-// }
-//
-// @Test
-// public void testIsIntervalFile() {
-// Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list"));
-// Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true));
-//
-// List extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard");
-// for (String extension: extensions) {
-// Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension);
-// }
-// }
-//
-// @Test(expectedExceptions = UserException.CouldNotReadInputFile.class)
-// public void testMissingIntervalFile() {
-// IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list");
-// }
-//
-// @Test
-// public void testFixedScatterIntervalsBasic() {
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
-//
-// List files = testFiles("basic.", 3, ".intervals");
-//
-// List locs = getLocs("chr1", "chr2", "chr3");
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3);
-// }
-//
-// @Test
-// public void testScatterFixedIntervalsLessFiles() {
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
-// GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
-//
-// List files = testFiles("less.", 3, ".intervals");
-//
-// List locs = getLocs("chr1", "chr2", "chr3", "chr4");
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 2);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3);
-// Assert.assertEquals(locs3.get(1), chr4);
-// }
-//
-// @Test(expectedExceptions=UserException.BadArgumentValue.class)
-// public void testSplitFixedIntervalsMoreFiles() {
-// List files = testFiles("more.", 3, ".intervals");
-// List locs = getLocs("chr1", "chr2");
-// IntervalUtils.splitFixedIntervals(locs, files.size());
-// }
-//
-// @Test(expectedExceptions=UserException.BadArgumentValue.class)
-// public void testScatterFixedIntervalsMoreFiles() {
-// List files = testFiles("more.", 3, ".intervals");
-// List locs = getLocs("chr1", "chr2");
-// List splits = IntervalUtils.splitFixedIntervals(locs, locs.size()); // locs.size() instead of files.size()
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-// }
-// @Test
-// public void testScatterFixedIntervalsStart() {
-// List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
-// GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
-// GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
-//
-// List files = testFiles("split.", 3, ".intervals");
-//
-// List locs = getLocs(intervals);
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 2);
-//
-// Assert.assertEquals(locs1.get(0), chr1a);
-// Assert.assertEquals(locs2.get(0), chr1b);
-// Assert.assertEquals(locs3.get(0), chr2);
-// Assert.assertEquals(locs3.get(1), chr3);
-// }
-//
-// @Test
-// public void testScatterFixedIntervalsMiddle() {
-// List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
-// GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
-// GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
-//
-// List files = testFiles("split.", 3, ".intervals");
-//
-// List locs = getLocs(intervals);
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 2);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2a);
-// Assert.assertEquals(locs3.get(0), chr2b);
-// Assert.assertEquals(locs3.get(1), chr3);
-// }
-//
-// @Test
-// public void testScatterFixedIntervalsEnd() {
-// List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
-// GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
-// GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
-//
-// List files = testFiles("split.", 3, ".intervals");
-//
-// List locs = getLocs(intervals);
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 2);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs1.get(1), chr2);
-// Assert.assertEquals(locs2.get(0), chr3a);
-// Assert.assertEquals(locs3.get(0), chr3b);
-// }
-//
-// @Test
-// public void testScatterFixedIntervalsFile() {
-// List files = testFiles("sg.", 20, ".intervals");
-// List locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(BaseTest.GATKDataLocation + "whole_exome_agilent_designed_120.targets.hg18.chr20.interval_list"), false);
-// List splits = IntervalUtils.splitFixedIntervals(locs, files.size());
-//
-// int[] counts = {
-// 125, 138, 287, 291, 312, 105, 155, 324,
-// 295, 298, 141, 121, 285, 302, 282, 88,
-// 116, 274, 282, 248
-//// 5169, 5573, 10017, 10567, 10551,
-//// 5087, 4908, 10120, 10435, 10399,
-//// 5391, 4735, 10621, 10352, 10654,
-//// 5227, 5256, 10151, 9649, 9825
-// };
-//
-// //String splitCounts = "";
-// for (int lastIndex = 0, i = 0; i < splits.size(); i++) {
-// int splitIndex = splits.get(i);
-// int splitCount = (splitIndex - lastIndex);
-// //splitCounts += ", " + splitCount;
-// lastIndex = splitIndex;
-// Assert.assertEquals(splitCount, counts[i], "Num intervals in split " + i);
-// }
-// //System.out.println(splitCounts.substring(2));
-//
-// IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
-//
-// int locIndex = 0;
-// for (int i = 0; i < files.size(); i++) {
-// String file = files.get(i).toString();
-// List parsedLocs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(file), false);
-// Assert.assertEquals(parsedLocs.size(), counts[i], "Intervals in " + file);
-// for (GenomeLoc parsedLoc: parsedLocs)
-// Assert.assertEquals(parsedLoc, locs.get(locIndex), String.format("Genome loc %d from file %d", locIndex++, i));
-// }
-// Assert.assertEquals(locIndex, locs.size(), "Total number of GenomeLocs");
-// }
-//
-// @Test
-// public void testScatterFixedIntervalsMax() {
-// List files = testFiles("sg.", 85, ".intervals");
-// List splits = IntervalUtils.splitFixedIntervals(hg19ReferenceLocs, files.size());
-// IntervalUtils.scatterFixedIntervals(hg19Header, hg19ReferenceLocs, splits, files);
-//
-// for (int i = 0; i < files.size(); i++) {
-// String file = files.get(i).toString();
-// List parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
-// Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
-// Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
-// }
-// }
-//
-// @Test
-// public void testScatterContigIntervalsOrder() {
-// List intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2");
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
-//
-// List files = testFiles("split.", 3, ".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr2);
-// Assert.assertEquals(locs2.get(0), chr1);
-// Assert.assertEquals(locs3.get(0), chr3);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsBasic() {
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
-//
-// List files = testFiles("contig_basic.", 3, ".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3"), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsLessFiles() {
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
-// GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
-//
-// List files = testFiles("contig_less.", 3, ".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3", "chr4"), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 2);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3);
-// Assert.assertEquals(locs3.get(1), chr4);
-// }
-//
-// @Test(expectedExceptions=UserException.BadArgumentValue.class)
-// public void testScatterContigIntervalsMoreFiles() {
-// List files = testFiles("contig_more.", 3, ".intervals");
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2"), files);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsStart() {
-// List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
-// GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
-// GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
-//
-// List files = testFiles("contig_split_start.", 3, ".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 2);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr1a);
-// Assert.assertEquals(locs1.get(1), chr1b);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsMiddle() {
-// List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
-// GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
-// GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
-// GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
-//
-// List files = testFiles("contig_split_middle.", 3, ".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 2);
-// Assert.assertEquals(locs3.size(), 1);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2a);
-// Assert.assertEquals(locs2.get(1), chr2b);
-// Assert.assertEquals(locs3.get(0), chr3);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsEnd() {
-// List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
-// GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
-// GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
-// GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
-// GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
-//
-// List files = testFiles("contig_split_end.", 3 ,".intervals");
-//
-// IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
-//
-// List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
-// List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
-// List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
-//
-// Assert.assertEquals(locs1.size(), 1);
-// Assert.assertEquals(locs2.size(), 1);
-// Assert.assertEquals(locs3.size(), 2);
-//
-// Assert.assertEquals(locs1.get(0), chr1);
-// Assert.assertEquals(locs2.get(0), chr2);
-// Assert.assertEquals(locs3.get(0), chr3a);
-// Assert.assertEquals(locs3.get(1), chr3b);
-// }
-//
-// @Test
-// public void testScatterContigIntervalsMax() {
-// List files = testFiles("sg.", 85, ".intervals");
-// IntervalUtils.scatterContigIntervals(hg19Header, hg19ReferenceLocs, files);
-//
-// for (int i = 0; i < files.size(); i++) {
-// String file = files.get(i).toString();
-// List parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
-// Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
-// Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
-// }
-// }
-//
-// private List testFiles(String prefix, int count, String suffix) {
-// ArrayList files = new ArrayList();
-// for (int i = 1; i <= count; i++) {
-// files.add(createTempFile(prefix + i, suffix));
-// }
-// return files;
-// }
-//
-// @DataProvider(name="unmergedIntervals")
-// public Object[][] getUnmergedIntervals() {
-// return new Object[][] {
-// new Object[] {"small_unmerged_picard_intervals.list"},
-// new Object[] {"small_unmerged_gatk_intervals.list"}
-// };
-// }
-//
-// @Test(dataProvider="unmergedIntervals")
-// public void testUnmergedIntervals(String unmergedIntervals) {
-// List locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(validationDataLocation + unmergedIntervals), false);
-// Assert.assertEquals(locs.size(), 2);
-//
-// List merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
-// Assert.assertEquals(merged.size(), 1);
-// }
+ @Test(expectedExceptions=UserException.class)
+ public void testMergeListsBySetOperatorNoOverlap() {
+ // a couple of lists we'll use for the testing
+ List listEveryTwoFromOne = new ArrayList();
+ List listEveryTwoFromTwo = new ArrayList();
+
+ // create the two lists we'll use
+ for (int x = 1; x < 101; x++) {
+ if (x % 2 == 0)
+ listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ else
+ listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ }
+
+ List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
+ Assert.assertEquals(ret.size(), 100);
+ ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
+ Assert.assertEquals(ret.size(), 0);
+ }
+
+ @Test
+ public void testMergeListsBySetOperatorAllOverlap() {
+ // a couple of lists we'll use for the testing
+ List allSites = new ArrayList();
+ List listEveryTwoFromTwo = new ArrayList();
+
+ // create the two lists we'll use
+ for (int x = 1; x < 101; x++) {
+ if (x % 2 == 0)
+ listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ }
+
+ List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
+ Assert.assertEquals(ret.size(), 150);
+ ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
+ Assert.assertEquals(ret.size(), 50);
+ }
+
+ @Test
+ public void testMergeListsBySetOperator() {
+ // a couple of lists we'll use for the testing
+ List allSites = new ArrayList();
+ List listEveryTwoFromTwo = new ArrayList();
+
+ // create the two lists we'll use
+ for (int x = 1; x < 101; x++) {
+ if (x % 5 == 0) {
+ listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
+ }
+ }
+
+ List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
+ Assert.assertEquals(ret.size(), 40);
+ ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
+ Assert.assertEquals(ret.size(), 20);
+ }
+
+ @Test
+ public void testGetContigLengths() {
+ Map lengths = IntervalUtils.getContigSizes(new File(BaseTest.hg18Reference));
+ Assert.assertEquals((long)lengths.get("chr1"), 247249719);
+ Assert.assertEquals((long)lengths.get("chr2"), 242951149);
+ Assert.assertEquals((long)lengths.get("chr3"), 199501827);
+ Assert.assertEquals((long)lengths.get("chr20"), 62435964);
+ Assert.assertEquals((long)lengths.get("chrX"), 154913754);
+ }
+
+ @Test
+ public void testParseIntervalArguments() {
+ Assert.assertEquals(getLocs().size(), 45);
+ Assert.assertEquals(getLocs("chr1", "chr2", "chr3").size(), 3);
+ Assert.assertEquals(getLocs("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2").size(), 4);
+ }
+
+ @Test
+ public void testIsIntervalFile() {
+ Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list"));
+ Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true));
+
+ List extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard");
+ for (String extension: extensions) {
+ Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension);
+ }
+ }
+
+ @Test(expectedExceptions = UserException.CouldNotReadInputFile.class)
+ public void testMissingIntervalFile() {
+ IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list");
+ }
+
+ @Test
+ public void testFixedScatterIntervalsBasic() {
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
+
+ List files = testFiles("basic.", 3, ".intervals");
+
+ List locs = getLocs("chr1", "chr2", "chr3");
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, files.size()), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2);
+ Assert.assertEquals(locs3.get(0), chr3);
+ }
+
+ @Test
+ public void testScatterFixedIntervalsLessFiles() {
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
+ GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
+
+ List files = testFiles("less.", 3, ".intervals");
+
+ List locs = getLocs("chr1", "chr2", "chr3", "chr4");
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, files.size()), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 2);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs1.get(1), chr2);
+ Assert.assertEquals(locs2.get(0), chr3);
+ Assert.assertEquals(locs3.get(0), chr4);
+ }
+
+ @Test(expectedExceptions=UserException.BadArgumentValue.class)
+ public void testSplitFixedIntervalsMoreFiles() {
+ List files = testFiles("more.", 3, ".intervals");
+ List locs = getLocs("chr1", "chr2");
+ IntervalUtils.splitFixedIntervals(locs, files.size());
+ }
+
+ @Test(expectedExceptions=UserException.BadArgumentValue.class)
+ public void testScatterFixedIntervalsMoreFiles() {
+ List files = testFiles("more.", 3, ".intervals");
+ List locs = getLocs("chr1", "chr2");
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, locs.size()), files);
+ }
+ @Test
+ public void testScatterFixedIntervalsStart() {
+ List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
+ GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
+ GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
+
+ List files = testFiles("split.", 3, ".intervals");
+
+ List locs = getLocs(intervals);
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, files.size()), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 2);
+
+ Assert.assertEquals(locs1.get(0), chr1a);
+ Assert.assertEquals(locs2.get(0), chr1b);
+ Assert.assertEquals(locs3.get(0), chr2);
+ Assert.assertEquals(locs3.get(1), chr3);
+ }
+
+ @Test
+ public void testScatterFixedIntervalsMiddle() {
+ List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
+ GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
+ GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
+
+ List files = testFiles("split.", 3, ".intervals");
+
+ List locs = getLocs(intervals);
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, files.size()), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 2);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2a);
+ Assert.assertEquals(locs3.get(0), chr2b);
+ Assert.assertEquals(locs3.get(1), chr3);
+ }
+
+ @Test
+ public void testScatterFixedIntervalsEnd() {
+ List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
+ GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
+ GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
+
+ List files = testFiles("split.", 3, ".intervals");
+
+ List locs = getLocs(intervals);
+ IntervalUtils.scatterFixedIntervals(hg18Header, IntervalUtils.splitFixedIntervals(locs, files.size()), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 2);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs1.get(1), chr2);
+ Assert.assertEquals(locs2.get(0), chr3a);
+ Assert.assertEquals(locs3.get(0), chr3b);
+ }
+
+ @Test
+ public void testScatterFixedIntervalsFile() {
+ List files = testFiles("sg.", 20, ".intervals");
+ List locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(BaseTest.GATKDataLocation + "whole_exome_agilent_designed_120.targets.hg18.chr20.interval_list"), false);
+ List> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
+
+ int[] counts = {
+ 125, 138, 287, 291, 312, 105, 155, 324,
+ 295, 298, 141, 121, 285, 302, 282, 88,
+ 116, 274, 282, 248
+// 5169, 5573, 10017, 10567, 10551,
+// 5087, 4908, 10120, 10435, 10399,
+// 5391, 4735, 10621, 10352, 10654,
+// 5227, 5256, 10151, 9649, 9825
+ };
+
+ //String splitCounts = "";
+ for (int i = 0; i < splits.size(); i++) {
+ long splitCount = splits.get(i).size();
+ Assert.assertEquals(splitCount, counts[i], "Num intervals in split " + i);
+ }
+ //System.out.println(splitCounts.substring(2));
+
+ IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
+
+ int locIndex = 0;
+ for (int i = 0; i < files.size(); i++) {
+ String file = files.get(i).toString();
+ List parsedLocs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(file), false);
+ Assert.assertEquals(parsedLocs.size(), counts[i], "Intervals in " + file);
+ for (GenomeLoc parsedLoc: parsedLocs)
+ Assert.assertEquals(parsedLoc, locs.get(locIndex), String.format("Genome loc %d from file %d", locIndex++, i));
+ }
+ Assert.assertEquals(locIndex, locs.size(), "Total number of GenomeLocs");
+ }
+
+ @Test
+ public void testScatterFixedIntervalsMax() {
+ List files = testFiles("sg.", 85, ".intervals");
+ IntervalUtils.scatterFixedIntervals(hg19Header, IntervalUtils.splitFixedIntervals(hg19ReferenceLocs, files.size()), files);
+
+ for (int i = 0; i < files.size(); i++) {
+ String file = files.get(i).toString();
+ List parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
+ Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
+ Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
+ }
+ }
+
+ @Test
+ public void testScatterContigIntervalsOrder() {
+ List intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2");
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
+
+ List files = testFiles("split.", 3, ".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr2);
+ Assert.assertEquals(locs2.get(0), chr1);
+ Assert.assertEquals(locs3.get(0), chr3);
+ }
+
+ @Test
+ public void testScatterContigIntervalsBasic() {
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
+
+ List files = testFiles("contig_basic.", 3, ".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3"), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2);
+ Assert.assertEquals(locs3.get(0), chr3);
+ }
+
+ @Test
+ public void testScatterContigIntervalsLessFiles() {
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
+ GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
+
+ List files = testFiles("contig_less.", 3, ".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3", "chr4"), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 2);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2);
+ Assert.assertEquals(locs3.get(0), chr3);
+ Assert.assertEquals(locs3.get(1), chr4);
+ }
+
+ @Test(expectedExceptions=UserException.BadArgumentValue.class)
+ public void testScatterContigIntervalsMoreFiles() {
+ List files = testFiles("contig_more.", 3, ".intervals");
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2"), files);
+ }
+
+ @Test
+ public void testScatterContigIntervalsStart() {
+ List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
+ GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
+ GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
+
+ List files = testFiles("contig_split_start.", 3, ".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 2);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1a);
+ Assert.assertEquals(locs1.get(1), chr1b);
+ Assert.assertEquals(locs2.get(0), chr2);
+ Assert.assertEquals(locs3.get(0), chr3);
+ }
+
+ @Test
+ public void testScatterContigIntervalsMiddle() {
+ List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
+ GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
+ GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
+ GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
+
+ List files = testFiles("contig_split_middle.", 3, ".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 2);
+ Assert.assertEquals(locs3.size(), 1);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2a);
+ Assert.assertEquals(locs2.get(1), chr2b);
+ Assert.assertEquals(locs3.get(0), chr3);
+ }
+
+ @Test
+ public void testScatterContigIntervalsEnd() {
+ List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
+ GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
+ GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
+ GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
+ GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
+
+ List files = testFiles("contig_split_end.", 3 ,".intervals");
+
+ IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
+
+ List locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
+ List locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
+ List locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
+
+ Assert.assertEquals(locs1.size(), 1);
+ Assert.assertEquals(locs2.size(), 1);
+ Assert.assertEquals(locs3.size(), 2);
+
+ Assert.assertEquals(locs1.get(0), chr1);
+ Assert.assertEquals(locs2.get(0), chr2);
+ Assert.assertEquals(locs3.get(0), chr3a);
+ Assert.assertEquals(locs3.get(1), chr3b);
+ }
+
+ @Test
+ public void testScatterContigIntervalsMax() {
+ List files = testFiles("sg.", 85, ".intervals");
+ IntervalUtils.scatterContigIntervals(hg19Header, hg19ReferenceLocs, files);
+
+ for (int i = 0; i < files.size(); i++) {
+ String file = files.get(i).toString();
+ List parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
+ Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
+ Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
+ }
+ }
+
+ private List testFiles(String prefix, int count, String suffix) {
+ ArrayList files = new ArrayList();
+ for (int i = 1; i <= count; i++) {
+ files.add(createTempFile(prefix + i, suffix));
+ }
+ return files;
+ }
+
+ @DataProvider(name="unmergedIntervals")
+ public Object[][] getUnmergedIntervals() {
+ return new Object[][] {
+ new Object[] {"small_unmerged_picard_intervals.list"},
+ new Object[] {"small_unmerged_gatk_intervals.list"}
+ };
+ }
+
+ @Test(dataProvider="unmergedIntervals")
+ public void testUnmergedIntervals(String unmergedIntervals) {
+ List locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(validationDataLocation + unmergedIntervals), false);
+ Assert.assertEquals(locs.size(), 2);
+
+ List merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
+ Assert.assertEquals(merged.size(), 1);
+ }
}