From 7c5cdb51c222a2b297e5a10724675394de5826fb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 14 Mar 2012 17:26:06 -0400 Subject: [PATCH] UnitTests for ActivityProfile and minor ART cleanup -- TODO for ryan -- there are bugs in ActivityProfile code that I cannot fix right now :-( -- UnitTesting framework for ActivityProfile -- needs to be expanded -- Minor helper functions for ActiveRegion to help with unit tests --- .../traversals/TraverseActiveRegions.java | 1 + .../utils/activeregion/ActiveRegion.java | 14 ++ .../utils/activeregion/ActivityProfile.java | 8 +- .../activeregion/ActivityProfileUnitTest.java | 149 ++++++++++++++++++ 4 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index ff376fcd2..f9a185650 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -179,6 +179,7 @@ public class TraverseActiveRegions extends TraversalEngine walker, T sum, final int minStart, final String currentContig ) { // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them + // TODO can implement parallel traversal here while( workQueue.peek() != null ) { final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc(); if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) { diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index c2e69ee2d..37822dc84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -34,6 +34,11 @@ public class ActiveRegion implements HasGenomeLocation { fullExtentReferenceLoc = extendedLoc; } + @Override + public String toString() { + return "ActiveRegion " + activeRegionLoc.toString(); + } + // add each read to the bin and extend the reference genome activeRegionLoc if needed public void add( final GATKSAMRecord read ) { fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) ); @@ -78,4 +83,13 @@ public class ActiveRegion implements HasGenomeLocation { public void clearReads() { reads.clear(); } public void remove( final GATKSAMRecord read ) { reads.remove( read ); } public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } + + public boolean equalExceptReads(final ActiveRegion other) { + if ( ! activeRegionLoc.equals(other.activeRegionLoc)) return false; + if ( isActive != other.isActive ) return false; + if ( genomeLocParser != other.genomeLocParser ) return false; + if ( extension != other.extension ) return false; + if ( ! extendedLoc.equals(other.extendedLoc) ) return false; + return true; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 14ab97ee4..79b17cdba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.activeregion; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.Arrays; @@ -45,6 +46,8 @@ public class ActivityProfile { GenomeLoc regionStartLoc = null; final List isActiveList; + private GenomeLoc lastLoc = null; + // todo -- add upfront the start and stop of the intervals // todo -- check that no regions are unexpectedly missing // todo -- add unit tests @@ -61,7 +64,10 @@ public class ActivityProfile { } public void add(final GenomeLoc loc, final double score) { - // todo -- test for validity + if ( loc.size() != 1 ) + throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); + if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) + throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); isActiveList.add(score); if( regionStartLoc == null ) { regionStartLoc = loc; diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java new file mode 100644 index 000000000..e6d0322c0 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.activeregion; + + +// the imports for unit testing. + + +import net.sf.picard.reference.ReferenceSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.recalibration.QualQuantizer; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class ActivityProfileUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + private GenomeLoc startLoc; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100); + } + + // -------------------------------------------------------------------------------- + // + // Basic tests Provider + // + // -------------------------------------------------------------------------------- + + private class BasicActivityProfileTestProvider extends TestDataProvider { + List probs; + List expectedRegions; + int extension = 0; + GenomeLoc regionStart = startLoc; + + public BasicActivityProfileTestProvider(final List probs, final List expectedRegions) { + super(BasicActivityProfileTestProvider.class); + this.probs = probs; + this.expectedRegions = expectedRegions; + setName(getName()); + } + + public BasicActivityProfileTestProvider(final List probs, boolean startActive, int ... startsAndStops) { + super(BasicActivityProfileTestProvider.class); + this.probs = probs; + this.expectedRegions = toRegions(startActive, startsAndStops); + setName(getName()); + } + + private String getName() { + return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions)); + } + + private List toRegions(boolean isActive, int[] startsAndStops) { + List l = new ArrayList(); + for ( int i = 0; i < startsAndStops.length - 1; i++) { + int start = regionStart.getStart() + startsAndStops[i]; + int end = regionStart.getStart() + startsAndStops[i+1] - 1; + GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); + ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); + l.add(r); + isActive = ! isActive; + } + return l; + } + } + + @DataProvider(name = "BasicActivityProfileTestProvider") + public Object[][] makeQualIntervalTestProvider() { + new BasicActivityProfileTestProvider(Arrays.asList(1.0), true, 0, 1); + // TODO -- RYAN THESE ALL EXHIBIT AN OFF-BY-ONE ERROR. SORRY I HAVE TO GO BUT I CANNOT FIX NOW + //new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0), true, 0, 1, 2); + //new BasicActivityProfileTestProvider(Arrays.asList(0.0, 1.0), false, 0, 1, 2); + //new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + + return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); + } + + @Test(dataProvider = "BasicActivityProfileTestProvider") + public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { + ActivityProfile profile = new ActivityProfile(genomeLocParser, false); + + Assert.assertEquals(profile.parser, genomeLocParser); + + for ( int i = 0; i < cfg.probs.size(); i++ ) { + double p = cfg.probs.get(i); + GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); + profile.add(loc, p); + } + Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); + + Assert.assertEquals(profile.size(), cfg.probs.size()); + Assert.assertEquals(profile.isActiveList, cfg.probs); + + assertRegionsAreEqual(profile.createActiveRegions(0), cfg.expectedRegions); + } + + private void assertRegionsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i))); + } + } + + // todo -- test extensions +} \ No newline at end of file