2010-11-13 04:14:28 +08:00
|
|
|
package org.broadinstitute.sting.utils.interval;
|
|
|
|
|
|
|
|
|
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
2011-04-26 08:06:00 +08:00
|
|
|
import net.sf.samtools.SAMFileHeader;
|
2010-11-13 04:14:28 +08:00
|
|
|
import org.broadinstitute.sting.BaseTest;
|
2011-04-26 08:06:00 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
2010-11-13 04:14:28 +08:00
|
|
|
import org.testng.Assert;
|
|
|
|
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2010-12-06 02:29:39 +08:00
|
|
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
import org.testng.annotations.BeforeClass;
|
2011-04-07 05:12:05 +08:00
|
|
|
import org.testng.annotations.DataProvider;
|
2010-11-13 04:14:28 +08:00
|
|
|
import org.testng.annotations.Test;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
2011-01-03 10:17:25 +08:00
|
|
|
import java.io.FileNotFoundException;
|
2011-04-21 03:22:21 +08:00
|
|
|
import java.util.*;
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* test out the interval utility methods
|
|
|
|
|
*/
|
|
|
|
|
public class IntervalUtilsUnitTest extends BaseTest {
|
|
|
|
|
// used to seed the genome loc parser with a sequence dictionary
|
2011-05-05 04:49:35 +08:00
|
|
|
private SAMFileHeader hg18Header;
|
|
|
|
|
private GenomeLocParser hg18GenomeLocParser;
|
|
|
|
|
private List<GenomeLoc> hg18ReferenceLocs;
|
|
|
|
|
private SAMFileHeader hg19Header;
|
|
|
|
|
private GenomeLocParser hg19GenomeLocParser;
|
|
|
|
|
private List<GenomeLoc> hg19ReferenceLocs;
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
@BeforeClass
|
|
|
|
|
public void init() {
|
2011-05-05 04:49:35 +08:00
|
|
|
File hg18Ref = new File(BaseTest.hg18Reference);
|
2011-01-03 10:17:25 +08:00
|
|
|
try {
|
2011-05-05 04:49:35 +08:00
|
|
|
ReferenceDataSource referenceDataSource = new ReferenceDataSource(hg18Ref);
|
|
|
|
|
hg18Header = new SAMFileHeader();
|
|
|
|
|
hg18Header.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
|
|
|
|
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(hg18Ref);
|
|
|
|
|
hg18GenomeLocParser = new GenomeLocParser(seq);
|
|
|
|
|
hg18ReferenceLocs = Collections.unmodifiableList(GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()).toList()) ;
|
2011-01-03 10:17:25 +08:00
|
|
|
}
|
|
|
|
|
catch(FileNotFoundException ex) {
|
2011-05-05 04:49:35 +08:00
|
|
|
throw new UserException.CouldNotReadInputFile(hg18Ref,ex);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
File hg19Ref = new File(BaseTest.hg19Reference);
|
|
|
|
|
try {
|
|
|
|
|
ReferenceDataSource referenceDataSource = new ReferenceDataSource(hg19Ref);
|
|
|
|
|
hg19Header = new SAMFileHeader();
|
|
|
|
|
hg19Header.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
|
|
|
|
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(hg19Ref);
|
|
|
|
|
hg19GenomeLocParser = new GenomeLocParser(seq);
|
|
|
|
|
hg19ReferenceLocs = Collections.unmodifiableList(GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()).toList()) ;
|
|
|
|
|
}
|
|
|
|
|
catch(FileNotFoundException ex) {
|
|
|
|
|
throw new UserException.CouldNotReadInputFile(hg19Ref,ex);
|
2011-01-03 10:17:25 +08:00
|
|
|
}
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
2010-11-30 13:49:20 +08:00
|
|
|
@Test(expectedExceptions=UserException.class)
|
2010-11-13 04:14:28 +08:00
|
|
|
public void testMergeListsBySetOperatorNoOverlap() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromOne = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 2 == 0)
|
2011-05-05 04:49:35 +08:00
|
|
|
listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
2010-11-13 04:14:28 +08:00
|
|
|
else
|
2011-05-05 04:49:35 +08:00
|
|
|
listEveryTwoFromOne.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 100);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testMergeListsBySetOperatorAllOverlap() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 2 == 0)
|
2011-05-05 04:49:35 +08:00
|
|
|
listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 150);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 50);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testMergeListsBySetOperator() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 5 == 0) {
|
2011-05-05 04:49:35 +08:00
|
|
|
listEveryTwoFromTwo.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
allSites.add(hg18GenomeLocParser.createGenomeLoc("chr1",x,x));
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 40);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 20);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testGetContigLengths() {
|
|
|
|
|
Map<String, Long> lengths = IntervalUtils.getContigSizes(new File(BaseTest.hg18Reference));
|
|
|
|
|
Assert.assertEquals((long)lengths.get("chr1"), 247249719);
|
|
|
|
|
Assert.assertEquals((long)lengths.get("chr2"), 242951149);
|
|
|
|
|
Assert.assertEquals((long)lengths.get("chr3"), 199501827);
|
|
|
|
|
Assert.assertEquals((long)lengths.get("chr20"), 62435964);
|
|
|
|
|
Assert.assertEquals((long)lengths.get("chrX"), 154913754);
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
private List<GenomeLoc> getLocs(String... intervals) {
|
|
|
|
|
return getLocs(Arrays.asList(intervals));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private List<GenomeLoc> getLocs(List<String> intervals) {
|
|
|
|
|
if (intervals.size() == 0)
|
2011-05-05 04:49:35 +08:00
|
|
|
return hg18ReferenceLocs;
|
2011-04-26 08:06:00 +08:00
|
|
|
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
|
|
|
|
for (String interval: intervals)
|
2011-05-05 04:49:35 +08:00
|
|
|
locs.add(hg18GenomeLocParser.parseGenomeInterval(interval));
|
2011-04-26 08:06:00 +08:00
|
|
|
return locs;
|
2011-04-21 03:22:21 +08:00
|
|
|
}
|
|
|
|
|
|
2011-01-29 07:33:58 +08:00
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testParseIntervalArguments() {
|
|
|
|
|
Assert.assertEquals(getLocs().size(), 45);
|
|
|
|
|
Assert.assertEquals(getLocs("chr1", "chr2", "chr3").size(), 3);
|
|
|
|
|
Assert.assertEquals(getLocs("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2").size(), 4);
|
|
|
|
|
}
|
|
|
|
|
|
2011-02-01 02:07:34 +08:00
|
|
|
@Test
|
|
|
|
|
public void testIsIntervalFile() {
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list"));
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true));
|
|
|
|
|
|
|
|
|
|
List<String> extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard");
|
|
|
|
|
for (String extension: extensions) {
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions = UserException.CouldNotReadInputFile.class)
|
|
|
|
|
public void testMissingIntervalFile() {
|
|
|
|
|
IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list");
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testFixedScatterIntervalsBasic() {
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("basic.", 3, ".intervals");
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
List<GenomeLoc> locs = getLocs("chr1", "chr2", "chr3");
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterFixedIntervalsLessFiles() {
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("less.", 3, ".intervals");
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
List<GenomeLoc> locs = getLocs("chr1", "chr2", "chr3", "chr4");
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
2010-11-13 04:14:28 +08:00
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr4);
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions=UserException.BadArgumentValue.class)
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testSplitFixedIntervalsMoreFiles() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<File> files = testFiles("more.", 3, ".intervals");
|
2011-04-26 08:06:00 +08:00
|
|
|
List<GenomeLoc> locs = getLocs("chr1", "chr2");
|
|
|
|
|
IntervalUtils.splitFixedIntervals(locs, files.size());
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
@Test(expectedExceptions=UserException.BadArgumentValue.class)
|
|
|
|
|
public void testScatterFixedIntervalsMoreFiles() {
|
|
|
|
|
List<File> files = testFiles("more.", 3, ".intervals");
|
|
|
|
|
List<GenomeLoc> locs = getLocs("chr1", "chr2");
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, locs.size()); // locs.size() instead of files.size()
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2011-04-26 08:06:00 +08:00
|
|
|
}
|
2010-11-13 04:14:28 +08:00
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterFixedIntervalsStart() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2");
|
|
|
|
|
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
List<GenomeLoc> locs = getLocs(intervals);
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2011-04-26 08:06:00 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2011-04-26 08:06:00 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1a);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr1b);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterFixedIntervalsMiddle() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2");
|
|
|
|
|
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
2011-04-26 08:06:00 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs = getLocs(intervals);
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2011-04-26 08:06:00 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2011-04-26 08:06:00 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2a);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr2b);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterFixedIntervalsEnd() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2");
|
|
|
|
|
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2");
|
|
|
|
|
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5");
|
2011-04-26 08:06:00 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs = getLocs(intervals);
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs1.get(1), chr2);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr3a);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3b);
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
@Test
|
|
|
|
|
public void testScatterFixedIntervalsFile() {
|
2010-11-23 06:59:42 +08:00
|
|
|
List<File> files = testFiles("sg.", 20, ".intervals");
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(BaseTest.GATKDataLocation + "whole_exome_agilent_designed_120.targets.hg18.chr20.interval_list"), false);
|
2011-04-26 08:06:00 +08:00
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
|
|
|
|
|
|
|
|
|
int[] counts = {
|
|
|
|
|
5169, 5573, 10017, 10567, 10551,
|
|
|
|
|
5087, 4908, 10120, 10435, 10399,
|
|
|
|
|
5391, 4735, 10621, 10352, 10654,
|
|
|
|
|
5227, 5256, 10151, 9649, 9825
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//String splitCounts = "";
|
|
|
|
|
for (int lastIndex = 0, i = 0; i < splits.size(); i++) {
|
|
|
|
|
int splitIndex = splits.get(i);
|
|
|
|
|
int splitCount = (splitIndex - lastIndex);
|
|
|
|
|
//splitCounts += ", " + splitCount;
|
|
|
|
|
lastIndex = splitIndex;
|
|
|
|
|
Assert.assertEquals(splitCount, counts[i], "Num intervals in split " + i);
|
|
|
|
|
}
|
|
|
|
|
//System.out.println(splitCounts.substring(2));
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
2011-04-26 08:06:00 +08:00
|
|
|
|
|
|
|
|
int locIndex = 0;
|
|
|
|
|
for (int i = 0; i < files.size(); i++) {
|
|
|
|
|
String file = files.get(i).toString();
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> parsedLocs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(file), false);
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(parsedLocs.size(), counts[i], "Intervals in " + file);
|
|
|
|
|
for (GenomeLoc parsedLoc: parsedLocs)
|
|
|
|
|
Assert.assertEquals(parsedLoc, locs.get(locIndex), String.format("Genome loc %d from file %d", locIndex++, i));
|
|
|
|
|
}
|
|
|
|
|
Assert.assertEquals(locIndex, locs.size(), "Total number of GenomeLocs");
|
2010-11-23 06:59:42 +08:00
|
|
|
}
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
@Test
|
|
|
|
|
public void testScatterFixedIntervalsMax() {
|
|
|
|
|
List<File> files = testFiles("sg.", 85, ".intervals");
|
|
|
|
|
List<Integer> splits = IntervalUtils.splitFixedIntervals(hg19ReferenceLocs, files.size());
|
|
|
|
|
IntervalUtils.scatterFixedIntervals(hg19Header, hg19ReferenceLocs, splits, files);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < files.size(); i++) {
|
|
|
|
|
String file = files.get(i).toString();
|
|
|
|
|
List<GenomeLoc> parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
|
|
|
|
|
Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
|
|
|
|
|
Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsOrder() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<String> intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
2011-04-26 08:06:00 +08:00
|
|
|
Assert.assertEquals(locs1.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr1);
|
2010-11-13 04:14:28 +08:00
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsBasic() {
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_basic.", 3, ".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3"), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsLessFiles() {
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_less.", 3, ".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2", "chr3", "chr4"), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions=UserException.BadArgumentValue.class)
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsMoreFiles() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<File> files = testFiles("contig_more.", 3, ".intervals");
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs("chr1", "chr2"), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsStart() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2");
|
|
|
|
|
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_start.", 3, ".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1a);
|
|
|
|
|
Assert.assertEquals(locs1.get(1), chr1b);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsMiddle() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2");
|
|
|
|
|
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5");
|
|
|
|
|
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_middle.", 3, ".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2a);
|
|
|
|
|
Assert.assertEquals(locs2.get(1), chr2b);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
2011-04-26 08:06:00 +08:00
|
|
|
public void testScatterContigIntervalsEnd() {
|
2010-11-13 04:14:28 +08:00
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
|
2011-05-05 04:49:35 +08:00
|
|
|
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2");
|
|
|
|
|
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2");
|
|
|
|
|
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5");
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_end.", 3 ,".intervals");
|
|
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
IntervalUtils.scatterContigIntervals(hg18Header, getLocs(intervals), files);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3a);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr3b);
|
|
|
|
|
}
|
|
|
|
|
|
2011-05-05 11:32:35 +08:00
|
|
|
@Test
|
|
|
|
|
public void testScatterContigIntervalsMax() {
|
|
|
|
|
List<File> files = testFiles("sg.", 85, ".intervals");
|
|
|
|
|
IntervalUtils.scatterContigIntervals(hg19Header, hg19ReferenceLocs, files);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < files.size(); i++) {
|
|
|
|
|
String file = files.get(i).toString();
|
|
|
|
|
List<GenomeLoc> parsedLocs = IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(file), false);
|
|
|
|
|
Assert.assertEquals(parsedLocs.size(), 1, "parsedLocs[" + i + "].size()");
|
|
|
|
|
Assert.assertEquals(parsedLocs.get(0), hg19ReferenceLocs.get(i), "parsedLocs[" + i + "].get()");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
private List<File> testFiles(String prefix, int count, String suffix) {
|
2011-01-25 12:11:49 +08:00
|
|
|
ArrayList<File> files = new ArrayList<File>();
|
|
|
|
|
for (int i = 1; i <= count; i++) {
|
|
|
|
|
files.add(createTempFile(prefix + i, suffix));
|
2010-11-23 06:59:42 +08:00
|
|
|
}
|
2011-01-25 12:11:49 +08:00
|
|
|
return files;
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
2011-04-07 05:12:05 +08:00
|
|
|
|
|
|
|
|
@DataProvider(name="unmergedIntervals")
|
|
|
|
|
public Object[][] getUnmergedIntervals() {
|
|
|
|
|
return new Object[][] {
|
|
|
|
|
new Object[] {"small_unmerged_picard_intervals.list"},
|
|
|
|
|
new Object[] {"small_unmerged_gatk_intervals.list"}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(dataProvider="unmergedIntervals")
|
|
|
|
|
public void testUnmergedIntervals(String unmergedIntervals) {
|
2011-05-05 04:49:35 +08:00
|
|
|
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Collections.singletonList(validationDataLocation + unmergedIntervals), false);
|
2011-04-07 05:12:05 +08:00
|
|
|
Assert.assertEquals(locs.size(), 2);
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
2011-04-07 05:12:05 +08:00
|
|
|
Assert.assertEquals(merged.size(), 1);
|
|
|
|
|
}
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|