diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index d61da5a83..ce4d400b4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -28,20 +28,23 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * Created with IntelliJ IDEA. * User: thibault * Date: 11/13/12 * Time: 2:47 PM + * + * Test the Active Region Traversal Contract + * http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract */ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; public List isActiveCalls = new ArrayList(); + public List mappedActiveRegions = new ArrayList(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -55,6 +58,7 @@ public class TraverseActiveRegionsTest extends BaseTest { @Override public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + mappedActiveRegions.add(activeRegion); return 0; } @@ -73,7 +77,6 @@ public class TraverseActiveRegionsTest extends BaseTest { private IndexedFastaSequenceFile reference; private GenomeLocParser genomeLocParser; - private DummyActiveRegionWalker walker; @BeforeClass private void init() throws FileNotFoundException { @@ -83,61 +86,133 @@ public class TraverseActiveRegionsTest extends BaseTest { } @Test - public void testAllBasesSeenSuite() { + public void testAllBasesSeen() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); List intervals = new ArrayList(); - List activeIntervals = new ArrayList(); - GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); - intervals.add(interval); - testAllBasesSeen(intervals); + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1)); + List activeIntervals = getIsActiveIntervals(walker, intervals); + // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call + verifyEqualIntervals(intervals, activeIntervals); - interval = genomeLocParser.createGenomeLoc("1", 10, 20); - intervals.add(interval); - testAllBasesSeen(intervals); + intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); + activeIntervals = getIsActiveIntervals(walker, intervals); + // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call + verifyEqualIntervals(intervals, activeIntervals); + + // TODO: more tests and edge cases } - public void testAllBasesSeen(List intervals) { + private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { List activeIntervals = new ArrayList(); for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { t.traverse(walker, dataProvider, 0); activeIntervals.addAll(walker.isActiveCalls); } - boolean allBasesSeen = true; - for (GenomeLoc base : toBases(intervals)) { - boolean thisBaseSeen = false; - for (GenomeLoc activeLoc : activeIntervals) { - if (base.equals(activeLoc)) { - thisBaseSeen = true; - break; - } - } - if (!thisBaseSeen) { - allBasesSeen = false; - break; - } - } - - Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile"); + return activeIntervals; } - private List toBases(List intervals) { - List bases = new ArrayList(); + @Test + public void testActiveRegionCoverage() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + List intervals = new ArrayList(); + + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + List activeRegions = getActiveRegions(walker, intervals); + verifyActiveRegionCoverage(intervals, activeRegions); + + // TODO: more tests and edge cases + } + + private void verifyActiveRegionCoverage(List intervals, List activeRegions) { + List intervalStarts = new ArrayList(); + List intervalStops = new ArrayList(); + for (GenomeLoc interval : intervals) { - if (interval.size() == 1) - bases.add(interval); - else { - for (int location = interval.getStart(); location <= interval.getStop(); location++) { - bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); - } - } + intervalStarts.add(interval.getStartLocation()); + intervalStops.add(interval.getStopLocation()); } + + Map baseRegionMap = new HashMap(); + + for (ActiveRegion activeRegion : activeRegions) { + for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) { + // Contract: Regions do not overlap + Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region"); + baseRegionMap.put(activeLoc, activeRegion); + } + + GenomeLoc start = activeRegion.getLocation().getStartLocation(); + if (intervalStarts.contains(start)) + intervalStarts.remove(start); + + GenomeLoc stop = activeRegion.getLocation().getStopLocation(); + if (intervalStops.contains(stop)) + intervalStops.remove(stop); + } + + for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) { + // Contract: Each location in the interval(s) is in exactly one region + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region"); + baseRegionMap.remove(baseLoc); + } + + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals"); + + // Contract: All explicit interval boundaries must also be region boundaries + Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location"); + Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location"); + } + + private List getActiveRegions(DummyActiveRegionWalker walker, List intervals) { + for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) + t.traverse(walker, dataProvider, 0); + + return walker.mappedActiveRegions; + } + + private Collection toSingleBaseLocs(GenomeLoc interval) { + List bases = new ArrayList(); + if (interval.size() == 1) + bases.add(interval); + else { + for (int location = interval.getStart(); location <= interval.getStop(); location++) + bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); + } + return bases; } - private List createDataProviders(List intervals) { - walker = new DummyActiveRegionWalker(); + private Collection toSingleBaseLocs(List intervals) { + Set bases = new TreeSet(); // for sorting and uniqueness + for (GenomeLoc interval : intervals) + bases.addAll(toSingleBaseLocs(interval)); + return bases; + } + + private void verifyEqualIntervals(List aIntervals, List bIntervals) { + Collection aBases = toSingleBaseLocs(aIntervals); + Collection bBases = toSingleBaseLocs(bIntervals); + + Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size()); + + Iterator aIter = aBases.iterator(); + Iterator bIter = bBases.iterator(); + while (aIter.hasNext() && bIter.hasNext()) { + GenomeLoc aLoc = aIter.next(); + GenomeLoc bLoc = bIter.next(); + Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc); + } + } + + private List createDataProviders(List intervals) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); @@ -146,8 +221,7 @@ public class TraverseActiveRegionsTest extends BaseTest { Shard shard = new MockLocusShard(genomeLocParser, intervals); List providers = new ArrayList(); - WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs()); - for (WindowMaker.WindowMakerIterator window : windowMaker) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) { providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList())); }