Add ActiveRegion tests and refactor

This commit is contained in:
Joel Thibault 2012-11-19 14:45:19 -05:00
parent e8defcb20d
commit 3fa3b00f4a
1 changed files with 115 additions and 41 deletions

View File

@ -28,20 +28,23 @@ import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.ArrayList; import java.util.*;
import java.util.List;
/** /**
* Created with IntelliJ IDEA. * Created with IntelliJ IDEA.
* User: thibault * User: thibault
* Date: 11/13/12 * Date: 11/13/12
* Time: 2:47 PM * Time: 2:47 PM
*
* Test the Active Region Traversal Contract
* http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
*/ */
public class TraverseActiveRegionsTest extends BaseTest { public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> { private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
private final double prob; private final double prob;
public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>(); public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
public List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
public DummyActiveRegionWalker() { public DummyActiveRegionWalker() {
this.prob = 1.0; this.prob = 1.0;
@ -55,6 +58,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
@Override @Override
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
mappedActiveRegions.add(activeRegion);
return 0; return 0;
} }
@ -73,7 +77,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
private IndexedFastaSequenceFile reference; private IndexedFastaSequenceFile reference;
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
private DummyActiveRegionWalker walker;
@BeforeClass @BeforeClass
private void init() throws FileNotFoundException { private void init() throws FileNotFoundException {
@ -83,61 +86,133 @@ public class TraverseActiveRegionsTest extends BaseTest {
} }
@Test @Test
public void testAllBasesSeenSuite() { public void testAllBasesSeen() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>(); List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1));
intervals.add(interval); List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
testAllBasesSeen(intervals); // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
verifyEqualIntervals(intervals, activeIntervals);
interval = genomeLocParser.createGenomeLoc("1", 10, 20); intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
intervals.add(interval); activeIntervals = getIsActiveIntervals(walker, intervals);
testAllBasesSeen(intervals); // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
verifyEqualIntervals(intervals, activeIntervals);
// TODO: more tests and edge cases
} }
public void testAllBasesSeen(List<GenomeLoc> intervals) { private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>(); List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
t.traverse(walker, dataProvider, 0); t.traverse(walker, dataProvider, 0);
activeIntervals.addAll(walker.isActiveCalls); activeIntervals.addAll(walker.isActiveCalls);
} }
boolean allBasesSeen = true; return activeIntervals;
for (GenomeLoc base : toBases(intervals)) {
boolean thisBaseSeen = false;
for (GenomeLoc activeLoc : activeIntervals) {
if (base.equals(activeLoc)) {
thisBaseSeen = true;
break;
}
}
if (!thisBaseSeen) {
allBasesSeen = false;
break;
}
} }
Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile"); @Test
public void testActiveRegionCoverage() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
List<ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
verifyActiveRegionCoverage(intervals, activeRegions);
// TODO: more tests and edge cases
} }
private List<GenomeLoc> toBases(List<GenomeLoc> intervals) { private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, List<ActiveRegion> activeRegions) {
List<GenomeLoc> bases = new ArrayList<GenomeLoc>(); List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
for (GenomeLoc interval : intervals) { for (GenomeLoc interval : intervals) {
intervalStarts.add(interval.getStartLocation());
intervalStops.add(interval.getStopLocation());
}
Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
for (ActiveRegion activeRegion : activeRegions) {
for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
// Contract: Regions do not overlap
Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
baseRegionMap.put(activeLoc, activeRegion);
}
GenomeLoc start = activeRegion.getLocation().getStartLocation();
if (intervalStarts.contains(start))
intervalStarts.remove(start);
GenomeLoc stop = activeRegion.getLocation().getStopLocation();
if (intervalStops.contains(stop))
intervalStops.remove(stop);
}
for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
// Contract: Each location in the interval(s) is in exactly one region
// Contract: The total set of regions exactly matches the analysis interval(s)
Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
baseRegionMap.remove(baseLoc);
}
// Contract: The total set of regions exactly matches the analysis interval(s)
Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
// Contract: All explicit interval boundaries must also be region boundaries
Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
}
private List<ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
t.traverse(walker, dataProvider, 0);
return walker.mappedActiveRegions;
}
private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
if (interval.size() == 1) if (interval.size() == 1)
bases.add(interval); bases.add(interval);
else { else {
for (int location = interval.getStart(); location <= interval.getStop(); location++) { for (int location = interval.getStart(); location <= interval.getStop(); location++)
bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
} }
}
}
return bases; return bases;
} }
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) { private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
walker = new DummyActiveRegionWalker(); Set<GenomeLoc> bases = new TreeSet<GenomeLoc>(); // for sorting and uniqueness
for (GenomeLoc interval : intervals)
bases.addAll(toSingleBaseLocs(interval));
return bases;
}
private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
Iterator<GenomeLoc> aIter = aBases.iterator();
Iterator<GenomeLoc> bIter = bBases.iterator();
while (aIter.hasNext() && bIter.hasNext()) {
GenomeLoc aLoc = aIter.next();
GenomeLoc bLoc = bIter.next();
Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
}
}
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser); engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine); t.initialize(engine);
@ -146,8 +221,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
Shard shard = new MockLocusShard(genomeLocParser, intervals); Shard shard = new MockLocusShard(genomeLocParser, intervals);
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>(); List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs()); for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
for (WindowMaker.WindowMakerIterator window : windowMaker) {
providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>())); providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
} }