Add ActiveRegion tests and refactor
This commit is contained in:
parent
e8defcb20d
commit
3fa3b00f4a
|
|
@ -28,20 +28,23 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 11/13/12
|
||||
* Time: 2:47 PM
|
||||
*
|
||||
* Test the Active Region Traversal Contract
|
||||
* http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
|
||||
*/
|
||||
public class TraverseActiveRegionsTest extends BaseTest {
|
||||
|
||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||
public List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
|
||||
|
||||
public DummyActiveRegionWalker() {
|
||||
this.prob = 1.0;
|
||||
|
|
@ -55,6 +58,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
mappedActiveRegions.add(activeRegion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -73,7 +77,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private DummyActiveRegionWalker walker;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
|
|
@ -83,61 +86,133 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testAllBasesSeenSuite() {
|
||||
public void testAllBasesSeen() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
|
||||
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
intervals.add(interval);
|
||||
testAllBasesSeen(intervals);
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1));
|
||||
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
|
||||
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
|
||||
verifyEqualIntervals(intervals, activeIntervals);
|
||||
|
||||
interval = genomeLocParser.createGenomeLoc("1", 10, 20);
|
||||
intervals.add(interval);
|
||||
testAllBasesSeen(intervals);
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
activeIntervals = getIsActiveIntervals(walker, intervals);
|
||||
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
|
||||
verifyEqualIntervals(intervals, activeIntervals);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
public void testAllBasesSeen(List<GenomeLoc> intervals) {
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
||||
boolean allBasesSeen = true;
|
||||
for (GenomeLoc base : toBases(intervals)) {
|
||||
boolean thisBaseSeen = false;
|
||||
for (GenomeLoc activeLoc : activeIntervals) {
|
||||
if (base.equals(activeLoc)) {
|
||||
thisBaseSeen = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!thisBaseSeen) {
|
||||
allBasesSeen = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile");
|
||||
return activeIntervals;
|
||||
}
|
||||
|
||||
private List<GenomeLoc> toBases(List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
|
||||
@Test
|
||||
public void testActiveRegionCoverage() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
||||
List<ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
verifyActiveRegionCoverage(intervals, activeRegions);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, List<ActiveRegion> activeRegions) {
|
||||
List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
|
||||
|
||||
for (GenomeLoc interval : intervals) {
|
||||
if (interval.size() == 1)
|
||||
bases.add(interval);
|
||||
else {
|
||||
for (int location = interval.getStart(); location <= interval.getStop(); location++) {
|
||||
bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
|
||||
}
|
||||
}
|
||||
intervalStarts.add(interval.getStartLocation());
|
||||
intervalStops.add(interval.getStopLocation());
|
||||
}
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
for (ActiveRegion activeRegion : activeRegions) {
|
||||
for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
|
||||
// Contract: Regions do not overlap
|
||||
Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
|
||||
baseRegionMap.put(activeLoc, activeRegion);
|
||||
}
|
||||
|
||||
GenomeLoc start = activeRegion.getLocation().getStartLocation();
|
||||
if (intervalStarts.contains(start))
|
||||
intervalStarts.remove(start);
|
||||
|
||||
GenomeLoc stop = activeRegion.getLocation().getStopLocation();
|
||||
if (intervalStops.contains(stop))
|
||||
intervalStops.remove(stop);
|
||||
}
|
||||
|
||||
for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
|
||||
// Contract: Each location in the interval(s) is in exactly one region
|
||||
// Contract: The total set of regions exactly matches the analysis interval(s)
|
||||
Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
|
||||
baseRegionMap.remove(baseLoc);
|
||||
}
|
||||
|
||||
// Contract: The total set of regions exactly matches the analysis interval(s)
|
||||
Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
|
||||
|
||||
// Contract: All explicit interval boundaries must also be region boundaries
|
||||
Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
|
||||
Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
|
||||
}
|
||||
|
||||
private List<ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
||||
private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
|
||||
List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
|
||||
if (interval.size() == 1)
|
||||
bases.add(interval);
|
||||
else {
|
||||
for (int location = interval.getStart(); location <= interval.getStop(); location++)
|
||||
bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
|
||||
}
|
||||
|
||||
return bases;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
|
||||
walker = new DummyActiveRegionWalker();
|
||||
private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
|
||||
Set<GenomeLoc> bases = new TreeSet<GenomeLoc>(); // for sorting and uniqueness
|
||||
for (GenomeLoc interval : intervals)
|
||||
bases.addAll(toSingleBaseLocs(interval));
|
||||
|
||||
return bases;
|
||||
}
|
||||
|
||||
private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
|
||||
Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
|
||||
Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
|
||||
|
||||
Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
|
||||
|
||||
Iterator<GenomeLoc> aIter = aBases.iterator();
|
||||
Iterator<GenomeLoc> bIter = bBases.iterator();
|
||||
while (aIter.hasNext() && bIter.hasNext()) {
|
||||
GenomeLoc aLoc = aIter.next();
|
||||
GenomeLoc bLoc = bIter.next();
|
||||
Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
|
||||
}
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
|
|
@ -146,8 +221,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
for (WindowMaker.WindowMakerIterator window : windowMaker) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue