diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 74e28db63..0aa946d67 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "86ceec507e70d542decdae1d20ed6f82"); + "f751363288740c6fd9179a487be61fb4"); } private void HCTestComplexGGA(String bam, String args, String md5) { @@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "76d4c4a112cf60080adf74c3e116d1fb"); + "262e4c9a55baf1936a65612cfb1f6f81"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "23a4bfa0300683d8cf2ec16ce96e89ad"); + "71ef8d0217c1a73dd360413dccd05f4d"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a77ac53d67937feebfba22a9336a5421")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a4e74226b16a7d8c5999620c2f6be1ba")); executeTest("HCTestStructuralIndels: ", spec); } @@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("255947f39455c87c561be4aee4cab651")); + Arrays.asList("87bd7ac2f7d65580838c7c956ccf52b7")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 5d2aa6be3..64c6d5094 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -109,7 +109,8 @@ public class TraverseActiveRegions extends TraversalEngine * Class GenomeLocCollection *

@@ -59,6 +43,10 @@ import java.util.*; * will also remove a region from the list, if the region to remove is a * partial interval of a region in the collection it will remove the region from * that element. + * + * @author aaron + * Date: May 22, 2009 + * Time: 10:54:40 AM */ public class GenomeLocSortedSet extends AbstractSet { private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class); @@ -66,26 +54,47 @@ public class GenomeLocSortedSet extends AbstractSet { private GenomeLocParser genomeLocParser; // our private storage for the GenomeLoc's - private List mArray = new ArrayList(); + private final List mArray = new ArrayList(); // cache this to make overlap checking much more efficient private int previousOverlapSearchIndex = -1; - /** default constructor */ - public GenomeLocSortedSet(GenomeLocParser parser) { + /** + * Create a new, empty GenomeLocSortedSet + * + * @param parser a non-null the parser we use to create genome locs + */ + public GenomeLocSortedSet(final GenomeLocParser parser) { + if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); this.genomeLocParser = parser; } - public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) { + /** + * Create a new GenomeLocSortedSet containing location e + * + * @param parser a non-null the parser we use to create genome locs + * @param e a single genome locs to add to this set + */ + public GenomeLocSortedSet(final GenomeLocParser parser, final GenomeLoc e) { this(parser); add(e); } - public GenomeLocSortedSet(GenomeLocParser parser,Collection l) { + /** + * Create a new GenomeLocSortedSet containing locations l + * + * The elements in l can be in any order, and can be overlapping. They will be sorted first and + * overlapping (but not contiguous) elements will be merged + * + * @param parser a non-null the parser we use to create genome locs + * @param l a collection of genome locs to add to this set + */ + public GenomeLocSortedSet(final GenomeLocParser parser, final Collection l) { this(parser); - for ( GenomeLoc e : l ) - add(e); + final ArrayList sorted = new ArrayList(l); + Collections.sort(sorted); + mArray.addAll(IntervalUtils.mergeIntervalLocations(sorted, IntervalMergingRule.OVERLAPPING_ONLY)); } /** @@ -198,9 +207,72 @@ public class GenomeLocSortedSet extends AbstractSet { return returnValue; } + /** + * Return a list of intervals overlapping loc + * + * @param loc the location we want overlapping intervals + * @return a non-null list of locations that overlap loc + */ + public List getOverlapping(final GenomeLoc loc) { + // the max ensures that if loc would be the first element, that we start searching at the first element + final int index = Collections.binarySearch(mArray, loc); + if ( index >= 0 ) + // we can safely return a singleton because overlapping regions are merged and loc is exactly in + // the set already + return Collections.singletonList(loc); + + // if loc isn't in the list index is (-(insertion point) - 1). The insertion point is defined as the point at + // which the key would be inserted into the list: the index of the first element greater than the key, or list.size() + // -ins - 1 = index => -ins = index + 1 => ins = -(index + 1) + // Note that we look one before the index in this case, as loc might occur after the previous overlapping interval + final int start = Math.max(-(index + 1) - 1, 0); + final int size = mArray.size(); + + final List overlapping = new LinkedList(); + for ( int i = start; i < size; i++ ) { + final GenomeLoc myLoc = mArray.get(i); + if ( loc.overlapsP(myLoc) ) + overlapping.add(myLoc); + else if ( myLoc.isPast(loc) ) + // since mArray is ordered, if myLoc is past loc that means all future + // intervals cannot overlap loc either. So we can safely abort the search + // note that we need to be a bit conservative on our tests since index needs to start + // at -1 the position of index, so it's possible that myLoc and loc don't overlap but the next + // position might + break; + } + + return overlapping; + } + + /** + * Return a list of intervals overlapping loc by enumerating all locs and testing for overlap + * + * Purely for testing purposes -- this is way to slow for any production code + * + * @param loc the location we want overlapping intervals + * @return a non-null list of locations that overlap loc + */ + protected List getOverlappingFullSearch(final GenomeLoc loc) { + final List overlapping = new LinkedList(); + + // super slow, but definitely works + for ( final GenomeLoc myLoc : mArray ) { + if ( loc.overlapsP(myLoc) ) + overlapping.add(myLoc); + } + + return overlapping; + } + /** * add a genomeLoc to the collection, simply inserting in order into the set * + * TODO -- this may break the contract of the GenomeLocSortedSet if e overlaps or + * TODO -- other locations already in the set. This code should check to see if + * TODO -- e is overlapping with its nearby elements and merge them or alternatively + * TODO -- throw an exception + * * @param e the GenomeLoc to add * * @return true @@ -225,6 +297,11 @@ public class GenomeLocSortedSet extends AbstractSet { * Adds a GenomeLoc to the collection, merging it if it overlaps another region. * If it's not overlapping then we add it in sorted order. * + * TODO TODO TODO -- this function is buggy and will not properly create a sorted + * TODO TODO TODO -- genome loc is addRegion is called sequentially where the second + * TODO TODO TODO -- loc added is actually before the first. So when creating + * TODO TODO TODO -- sets make sure to sort the input locations first! + * * @param e the GenomeLoc to add to the collection * * @return true, if the GenomeLoc could be added to the collection @@ -380,31 +457,4 @@ public class GenomeLocSortedSet extends AbstractSet { return s.toString(); } - - /** - * Check to see whether two genomeLocSortedSets are equal. - * Note that this implementation ignores the contigInfo object. - * - */ /* - @Override - public boolean equals(Object other) { - if(other == null) - return false; - if(other instanceof GenomeLocSortedSet) { - // send to a list, so we can ensure order correct - List otherList = ((GenomeLocSortedSet)other).toList(); - List thisList = this.toList(); - if (otherList.size() != this.size()) - return false; - - for (Integer i=0;i splitAndTrimToIntervals(final GenomeLocSortedSet intervals) { + final List allOverlapping = intervals.getOverlapping(getLocation()); + final List clippedRegions = new LinkedList(); + + for ( final GenomeLoc overlapping : allOverlapping ) { + final GenomeLoc subLoc = getLocation().intersect(overlapping); + final int subStart = subLoc.getStart() - getLocation().getStart(); + final int subEnd = subStart + subLoc.size(); + final List subStates = supportingStates.isEmpty() ? supportingStates : supportingStates.subList(subStart, subEnd); + final ActiveRegion clipped = new ActiveRegion( subLoc, subStates, isActive, genomeLocParser, extension ); + clippedRegions.add(clipped); + } + + return clippedRegions; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index ff4673717..25948a857 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -29,6 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import java.util.*; @@ -45,6 +46,7 @@ public class ActivityProfile { protected final List stateList; protected final GenomeLocParser parser; + protected final GenomeLocSortedSet restrictToIntervals; protected GenomeLoc regionStartLoc = null; protected GenomeLoc regionStopLoc = null; @@ -60,10 +62,20 @@ public class ActivityProfile { * @param parser the parser we can use to create genome locs, cannot be null */ public ActivityProfile(final GenomeLocParser parser) { + this(parser, null); + } + + /** + * Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null + * @param parser the parser we can use to create genome locs, cannot be null + * @param intervals only include states that are within these intervals, if not null + */ + public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) { if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); this.parser = parser; this.stateList = new ArrayList(); + this.restrictToIntervals = intervals; } @Override @@ -224,7 +236,7 @@ public class ActivityProfile { if ( position > size() ) // should we allow this? probably not - throw new IllegalArgumentException("Must add state contiguous to existing states"); + throw new IllegalArgumentException("Must add state contiguous to existing states: adding " + stateToAdd); if ( position >= 0 ) { // ignore states starting before this regions start @@ -313,7 +325,10 @@ public class ActivityProfile { if ( nextRegion == null ) return regions; else { - regions.add(nextRegion); + if ( restrictToIntervals == null ) + regions.add(nextRegion); + else + regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals)); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java index abbc74df4..f2bc86dfc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.activeregion; import com.google.java.contract.Ensures; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.MathUtils; import java.util.ArrayList; @@ -53,25 +54,34 @@ public class BandPassActivityProfile extends ActivityProfile { private final double[] GaussianKernel; /** - * Create a new BandPassActivityProfile with default sigma and file sizes - * @param parser our genome loc parser + * Create a new BandPassActivityProfile with default sigma and filter sizes + * + * @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean) */ - public BandPassActivityProfile(final GenomeLocParser parser) { - this(parser, MAX_FILTER_SIZE, DEFAULT_SIGMA, true); + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) { + this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true); + } + + /** + * @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean) + * + * sets adaptiveFilterSize to true + */ + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) { + this(parser, restrictToIntervals, maxFilterSize, sigma, true); } /** * Create an activity profile that implements a band pass filter on the states + * * @param parser our genome loc parser + * @param restrictToIntervals only include states that are within these intervals, if not null * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma * @param sigma the variance of the Gaussian kernel for this band pass filter + * @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size */ - public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) { - this(parser, maxFilterSize, sigma, true); - } - - public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { - super(parser); + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { + super(parser, restrictToIntervals); if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index af22d852b..0b6e08fa7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -30,7 +30,9 @@ import org.broadinstitute.sting.commandline.ArgumentException; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.walkers.readutils.PrintReads; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.annotations.Test; import java.io.File; @@ -70,10 +72,12 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { @Test public void testEmptyIntervalSetHandling() throws Exception { + GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary()); + GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); testEngine.setWalker(new PrintReads()); - testEngine.setIntervals(new GenomeLocSortedSet(null)); + testEngine.setIntervals(new GenomeLocSortedSet(genomeLocParser)); testEngine.validateSuppliedIntervals(); } diff --git a/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java index 36e17de80..df41dc642 100644 --- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java @@ -28,18 +28,21 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.Iterator; -import java.util.Arrays; +import java.io.File; +import java.util.*; /** * @@ -321,4 +324,62 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { } assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES); } + + // ----------------------------------------------------------------------------------------------- + // + // Test getOverlapping + // + // ----------------------------------------------------------------------------------------------- + + @DataProvider(name = "GetOverlapping") + public Object[][] makeGetOverlappingTest() throws Exception { + final GenomeLocParser genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference))); + + List tests = new ArrayList(); + + final GenomeLoc prev1 = genomeLocParser.createGenomeLoc("19", 1, 10); + final GenomeLoc prev2 = genomeLocParser.createGenomeLoc("19", 20, 50); + final GenomeLoc post1 = genomeLocParser.createGenomeLoc("21", 1, 10); + final GenomeLoc post2 = genomeLocParser.createGenomeLoc("21", 20, 50); + + final int chr20Length = genomeLocParser.getContigs().getSequence("20").getSequenceLength(); + for ( final int regionStart : Arrays.asList(1, 10, chr20Length - 10, chr20Length) ) { + for ( final int regionSize : Arrays.asList(1, 10, 100) ) { + final GenomeLoc region = genomeLocParser.createGenomeLocOnContig("20", regionStart, regionStart + regionSize); + final GenomeLoc spanning = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, region.getStop() + 10); + final GenomeLoc before_into = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, regionStart + 1); + final GenomeLoc middle = genomeLocParser.createGenomeLocOnContig("20", regionStart + 1, regionStart + 2); + final GenomeLoc middle_past = genomeLocParser.createGenomeLocOnContig("20", region.getStop()-1, region.getStop()+10); + + final List potentials = new LinkedList(); + potentials.add(region); + if ( spanning != null ) potentials.add(spanning); + if ( before_into != null ) potentials.add(before_into); + if ( middle != null ) potentials.add(middle); + if ( middle_past != null ) potentials.add(middle_past); + + for ( final int n : Arrays.asList(1, 2, 3) ) { + for ( final List regions : Utils.makePermutations(potentials, n, false) ) { + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, regions), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1)), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2)), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1)), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1, post2)), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, post1)), region}); + tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2, post1, post2)), region}); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "GetOverlapping") + public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) { + final List expectedOverlapping = intervals.getOverlappingFullSearch(region); + final List actualOverlapping = intervals.getOverlapping(region); + Assert.assertEquals(actualOverlapping, expectedOverlapping); + Assert.assertEquals(intervals.overlaps(region), ! expectedOverlapping.isEmpty(), "GenomeLocSortedSet.overlaps didn't return expected result"); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java index d2ea5d11b..6ab429015 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActiveRegionUnitTest.java @@ -34,6 +34,7 @@ import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -48,6 +49,7 @@ import java.util.*; public class ActiveRegionUnitTest extends BaseTest { + private final static boolean DEBUG = true; private GenomeLocParser genomeLocParser; private IndexedFastaSequenceFile seq; private String contig; @@ -88,7 +90,7 @@ public class ActiveRegionUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(enabled = true, dataProvider = "ActionRegionCreationTest") + @Test(enabled = !DEBUG, dataProvider = "ActionRegionCreationTest") public void testCreatingActiveRegions(final GenomeLoc loc, final List supportingStates, final boolean isActive, final int extension) { final ActiveRegion region = new ActiveRegion(loc, supportingStates, isActive, genomeLocParser, extension); Assert.assertEquals(region.getLocation(), loc); @@ -141,7 +143,7 @@ public class ActiveRegionUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "ActiveRegionReads") + @Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads") public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) { final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read)); @@ -197,6 +199,12 @@ public class ActiveRegionUnitTest extends BaseTest { Assert.assertTrue(region.getReads().get(0).getAlignmentEnd() <= region.getExtendedLoc().getStop()); } + // ----------------------------------------------------------------------------------------------- + // + // Make sure bad inputs are properly detected + // + // ----------------------------------------------------------------------------------------------- + @DataProvider(name = "BadReadsTest") public Object[][] makeBadReadsTest() { List tests = new ArrayList(); @@ -213,11 +221,100 @@ public class ActiveRegionUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class) + @Test(enabled = !DEBUG, dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class) public void testBadReads(final GATKSAMRecord read1, final GATKSAMRecord read2) { final GenomeLoc loc = genomeLocParser.createGenomeLoc(read1); final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0); region.add(read1); region.add(read2); } + + // ----------------------------------------------------------------------------------------------- + // + // Make sure we can properly cut up an active region based on engine intervals + // + // ----------------------------------------------------------------------------------------------- + + @DataProvider(name = "SplitActiveRegion") + public Object[][] makeSplitActiveRegion() { + List tests = new ArrayList(); + + final GenomeLoc whole_span = genomeLocParser.createGenomeLoc("20", 1, 500); + final GenomeLoc gl_before = genomeLocParser.createGenomeLoc("20", 1, 9); + final GenomeLoc gl_after = genomeLocParser.createGenomeLoc("20", 250, 500); + final GenomeLoc gl_diff_contig = genomeLocParser.createGenomeLoc("19", 40, 50); + + final int regionStart = 10; + final int regionStop = 100; + final GenomeLoc region = genomeLocParser.createGenomeLoc("20", regionStart, regionStop); + + for ( final GenomeLoc noEffect : Arrays.asList(whole_span) ) + tests.add(new Object[]{ + region, + Arrays.asList(noEffect), + Arrays.asList(region)}); + + for ( final GenomeLoc noOverlap : Arrays.asList(gl_before, gl_after, gl_diff_contig) ) + tests.add(new Object[]{ + region, + Arrays.asList(noOverlap), + Arrays.asList()}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 5, 50)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 50))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, 200)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, regionStop))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 70, 200)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 70, regionStop))}); + + tests.add(new Object[]{region, + Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, 200)), + Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, regionStop))}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SplitActiveRegion") + public void testSplitActiveRegion(final GenomeLoc regionLoc, final List intervalLocs, final List expectedRegionLocs) { + for ( final boolean addSubstates : Arrays.asList(true, false) ) { + final List states; + if ( addSubstates ) { + states = new LinkedList(); + for ( int i = 0; i < regionLoc.size(); i++ ) + states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(regionLoc.getContig(), regionLoc.getStart() + i), 1.0)); + } else { + states = null; + } + + final ActiveRegion region = new ActiveRegion(regionLoc, states, true, genomeLocParser, 0); + final GenomeLocSortedSet intervals = new GenomeLocSortedSet(genomeLocParser, intervalLocs); + final List regions = region.splitAndTrimToIntervals(intervals); + + Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations"); + for ( int i = 0; i < expectedRegionLocs.size(); i++ ) { + final GenomeLoc expected = expectedRegionLocs.get(i); + final ActiveRegion actual = regions.get(i); + Assert.assertEquals(actual.getLocation(), expected, "Bad region after split"); + Assert.assertEquals(actual.isActive(), region.isActive()); + Assert.assertEquals(actual.getExtension(), region.getExtension()); + } + } + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index b9fdb3afe..9be250b8e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -89,7 +89,7 @@ public class ActivityProfileUnitTest extends BaseTest { case Base: return new ActivityProfile(genomeLocParser); case BandPass: // zero size => equivalent to ActivityProfile - return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false); + return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false); default: throw new IllegalStateException(type.toString()); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index 787db9a0f..d5231c30b 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -87,7 +87,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); @@ -142,7 +142,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test( enabled = ! DEBUG, dataProvider = "BandPassComposition") public void testBandPassComposition(final int bandPassSize, final int integrationLength) { final int start = 1; - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA); final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; // add a buffer so that we can get all of the band pass values @@ -215,7 +215,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test( enabled = ! DEBUG, dataProvider = "KernelCreation") public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true); final double[] kernel = profile.getKernel(); Assert.assertEquals(kernel.length, expectedKernel.length); @@ -255,8 +255,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest { final Pair reader = GATKVCFUtils.readAllVCs(file, codec); final List incRegions = new ArrayList(); - final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser); - final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser); + final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null); + final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null); int pos = start; for ( final VariantContext vc : reader.getSecond() ) { if ( vc == null ) continue;