2010-11-13 04:14:28 +08:00
|
|
|
package org.broadinstitute.sting.utils.interval;
|
|
|
|
|
|
|
|
|
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
|
|
|
import org.broadinstitute.sting.BaseTest;
|
|
|
|
|
import org.testng.Assert;
|
|
|
|
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2010-12-06 02:29:39 +08:00
|
|
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
import org.testng.annotations.BeforeClass;
|
2011-04-07 05:12:05 +08:00
|
|
|
import org.testng.annotations.DataProvider;
|
2010-11-13 04:14:28 +08:00
|
|
|
import org.testng.annotations.Test;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
2011-01-03 10:17:25 +08:00
|
|
|
import java.io.FileNotFoundException;
|
2011-04-21 03:22:21 +08:00
|
|
|
import java.util.*;
|
2010-11-13 04:14:28 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* test out the interval utility methods
|
|
|
|
|
*/
|
|
|
|
|
public class IntervalUtilsUnitTest extends BaseTest {
|
|
|
|
|
// used to seed the genome loc parser with a sequence dictionary
|
|
|
|
|
private static File reference = new File(BaseTest.hg18Reference);
|
|
|
|
|
private GenomeLocParser genomeLocParser;
|
|
|
|
|
|
|
|
|
|
@BeforeClass
|
|
|
|
|
public void init() {
|
2011-01-03 10:17:25 +08:00
|
|
|
try {
|
|
|
|
|
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(reference);
|
|
|
|
|
genomeLocParser = new GenomeLocParser(seq);
|
|
|
|
|
}
|
|
|
|
|
catch(FileNotFoundException ex) {
|
|
|
|
|
throw new UserException.CouldNotReadInputFile(reference,ex);
|
|
|
|
|
}
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
2010-11-30 13:49:20 +08:00
|
|
|
@Test(expectedExceptions=UserException.class)
|
2010-11-13 04:14:28 +08:00
|
|
|
public void testMergeListsBySetOperatorNoOverlap() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromOne = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 2 == 0)
|
|
|
|
|
listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
else
|
|
|
|
|
listEveryTwoFromOne.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 100);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testMergeListsBySetOperatorAllOverlap() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 2 == 0)
|
|
|
|
|
listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 150);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 50);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testMergeListsBySetOperator() {
|
|
|
|
|
// a couple of lists we'll use for the testing
|
|
|
|
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
|
|
|
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
|
|
|
|
|
|
|
|
|
// create the two lists we'll use
|
|
|
|
|
for (int x = 1; x < 101; x++) {
|
|
|
|
|
if (x % 5 == 0) {
|
|
|
|
|
listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 40);
|
|
|
|
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
|
|
|
|
Assert.assertEquals(ret.size(), 20);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testCountContigs() {
|
|
|
|
|
List<String> chrs = new ArrayList<String>();
|
|
|
|
|
for (int i = 1; i <= 22; i++)
|
|
|
|
|
chrs.add("chr" + i);
|
|
|
|
|
chrs.add("chrX");
|
|
|
|
|
chrs.add("chrY");
|
|
|
|
|
|
|
|
|
|
List<String> chrsNoRandom = Arrays.asList("chr12", "chr14", "chr20", "chrY");
|
|
|
|
|
List<String> chrsWithRandom = new ArrayList<String>();
|
|
|
|
|
chrsWithRandom.add("chrM");
|
|
|
|
|
chrsWithRandom.addAll(chrs);
|
|
|
|
|
for (String chr: chrs)
|
|
|
|
|
if(!chrsNoRandom.contains(chr))
|
|
|
|
|
chrsWithRandom.add(chr + "_random");
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(IntervalUtils.distinctContigs(reference), chrsWithRandom);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.distinctContigs(reference, Arrays.asList(BaseTest.validationDataLocation + "TCGA-06-0188.interval_list")), chrs);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.distinctContigs(reference, Arrays.asList("chr1:1-1", "chr2:1-1", "chr3:2-2")), Arrays.asList("chr1","chr2","chr3"));
|
|
|
|
|
Assert.assertEquals(IntervalUtils.distinctContigs(reference, Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2")), Arrays.asList("chr1","chr2","chr3"));
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-21 03:22:21 +08:00
|
|
|
@Test
|
|
|
|
|
public void testGetContigLengths() {
|
|
|
|
|
Map<String, Integer> lengths = IntervalUtils.getContigLengths(reference);
|
|
|
|
|
Assert.assertEquals((int)lengths.get("chr1"), 247249719);
|
|
|
|
|
Assert.assertEquals((int)lengths.get("chr2"), 242951149);
|
|
|
|
|
Assert.assertEquals((int)lengths.get("chr3"), 199501827);
|
|
|
|
|
Assert.assertEquals((int)lengths.get("chr20"), 62435964);
|
|
|
|
|
Assert.assertEquals((int)lengths.get("chrX"), 154913754);
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-29 07:33:58 +08:00
|
|
|
@Test
|
|
|
|
|
public void testCountIntervals() {
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Collections.<String>emptyList(), false), 45);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Collections.<String>emptyList(), true), 45);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3"), false), 3);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3"), true), 3);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"), false), 4);
|
|
|
|
|
Assert.assertEquals(IntervalUtils.countIntervalArguments(reference, Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"), true), 3);
|
|
|
|
|
}
|
|
|
|
|
|
2011-02-01 02:07:34 +08:00
|
|
|
@Test
|
|
|
|
|
public void testIsIntervalFile() {
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list"));
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "empty_intervals.list", true));
|
|
|
|
|
|
|
|
|
|
List<String> extensions = Arrays.asList("bed", "interval_list", "intervals", "list", "picard");
|
|
|
|
|
for (String extension: extensions) {
|
|
|
|
|
Assert.assertTrue(IntervalUtils.isIntervalFile("test_intervals." + extension, false), "Tested interval file extension: " + extension);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions = UserException.CouldNotReadInputFile.class)
|
|
|
|
|
public void testMissingIntervalFile() {
|
|
|
|
|
IntervalUtils.isIntervalFile(BaseTest.validationDataLocation + "no_such_intervals.list");
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
@Test
|
|
|
|
|
public void testBasicScatter() {
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("basic.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3"), files, false);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterLessFiles() {
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
GenomeLoc chr4 = genomeLocParser.parseGenomeInterval("chr4");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("less.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3", "chr4"), files, false);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs1.get(1), chr2);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr3);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions=UserException.BadArgumentValue.class)
|
|
|
|
|
public void testScatterMoreFiles() {
|
|
|
|
|
List<File> files = testFiles("more.", 3, ".intervals");
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2"), files, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterIntervals() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
|
|
|
|
GenomeLoc chr1a = genomeLocParser.parseGenomeInterval("chr1:1-2");
|
|
|
|
|
GenomeLoc chr1b = genomeLocParser.parseGenomeInterval("chr1:4-5");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3:2-2");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, intervals, files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1a);
|
|
|
|
|
Assert.assertEquals(locs1.get(1), chr1b);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-23 06:59:42 +08:00
|
|
|
@Test(enabled=false) // disabled, GenomeLoc.compareTo() returns 0 for two locs with the same start, causing an exception in GLSS.add().
|
|
|
|
|
public void testScatterIntervalsWithTheSameStart() {
|
|
|
|
|
List<File> files = testFiles("sg.", 20, ".intervals");
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(new File(hg18Reference), Arrays.asList(BaseTest.GATKDataLocation + "whole_exome_agilent_designed_120.targets.hg18.chr20.interval_list"), files, false);
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
@Test
|
|
|
|
|
public void testScatterOrder() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2");
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3:2-2");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("split.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, intervals, files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testBasicScatterByContig() {
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_basic.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3"), files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterByContigLessFiles() {
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3");
|
|
|
|
|
GenomeLoc chr4 = genomeLocParser.parseGenomeInterval("chr4");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_less.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2", "chr3", "chr4"), files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr4);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(expectedExceptions=UserException.BadArgumentValue.class)
|
|
|
|
|
public void testScatterByContigMoreFiles() {
|
|
|
|
|
List<File> files = testFiles("contig_more.", 3, ".intervals");
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, Arrays.asList("chr1", "chr2"), files, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterByContigIntervalsStart() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
|
|
|
|
GenomeLoc chr1a = genomeLocParser.parseGenomeInterval("chr1:1-2");
|
|
|
|
|
GenomeLoc chr1b = genomeLocParser.parseGenomeInterval("chr1:4-5");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2:1-1");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3:2-2");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_start.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, intervals, files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1a);
|
|
|
|
|
Assert.assertEquals(locs1.get(1), chr1b);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterByContigIntervalsMiddle() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2a = genomeLocParser.parseGenomeInterval("chr2:1-2");
|
|
|
|
|
GenomeLoc chr2b = genomeLocParser.parseGenomeInterval("chr2:4-5");
|
|
|
|
|
GenomeLoc chr3 = genomeLocParser.parseGenomeInterval("chr3:2-2");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_middle.", 3, ".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, intervals, files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 2);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 1);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2a);
|
|
|
|
|
Assert.assertEquals(locs2.get(1), chr2b);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testScatterByContigIntervalsEnd() {
|
|
|
|
|
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
|
|
|
|
|
GenomeLoc chr1 = genomeLocParser.parseGenomeInterval("chr1:1-1");
|
|
|
|
|
GenomeLoc chr2 = genomeLocParser.parseGenomeInterval("chr2:2-2");
|
|
|
|
|
GenomeLoc chr3a = genomeLocParser.parseGenomeInterval("chr3:1-2");
|
|
|
|
|
GenomeLoc chr3b = genomeLocParser.parseGenomeInterval("chr3:4-5");
|
|
|
|
|
|
|
|
|
|
List<File> files = testFiles("contig_split_end.", 3 ,".intervals");
|
|
|
|
|
|
|
|
|
|
IntervalUtils.scatterIntervalArguments(reference, intervals, files, true);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
|
|
|
|
List<GenomeLoc> locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser, Arrays.asList(files.get(2).toString()), false);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs2.size(), 1);
|
|
|
|
|
Assert.assertEquals(locs3.size(), 2);
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals(locs1.get(0), chr1);
|
|
|
|
|
Assert.assertEquals(locs2.get(0), chr2);
|
|
|
|
|
Assert.assertEquals(locs3.get(0), chr3a);
|
|
|
|
|
Assert.assertEquals(locs3.get(1), chr3b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private List<File> testFiles(String prefix, int count, String suffix) {
|
2011-01-25 12:11:49 +08:00
|
|
|
ArrayList<File> files = new ArrayList<File>();
|
|
|
|
|
for (int i = 1; i <= count; i++) {
|
|
|
|
|
files.add(createTempFile(prefix + i, suffix));
|
2010-11-23 06:59:42 +08:00
|
|
|
}
|
2011-01-25 12:11:49 +08:00
|
|
|
return files;
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
2011-04-07 05:12:05 +08:00
|
|
|
|
|
|
|
|
@DataProvider(name="unmergedIntervals")
|
|
|
|
|
public Object[][] getUnmergedIntervals() {
|
|
|
|
|
return new Object[][] {
|
|
|
|
|
new Object[] {"small_unmerged_picard_intervals.list"},
|
|
|
|
|
new Object[] {"small_unmerged_gatk_intervals.list"}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test(dataProvider="unmergedIntervals")
|
|
|
|
|
public void testUnmergedIntervals(String unmergedIntervals) {
|
|
|
|
|
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(genomeLocParser, Collections.singletonList(validationDataLocation + unmergedIntervals), false);
|
|
|
|
|
Assert.assertEquals(locs.size(), 2);
|
|
|
|
|
|
|
|
|
|
List<GenomeLoc> merged = genomeLocParser.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
|
|
|
|
|
Assert.assertEquals(merged.size(), 1);
|
|
|
|
|
}
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|