diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 74ff77e4b..3712a8e51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; @@ -64,7 +64,7 @@ import java.io.PrintStream; @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) -@ActiveRegionExtension(extension = 0, maxRegion = 50000) +@ActiveRegionTraversalParameters(extension = 0, maxRegion = 50000) public class FindCoveredIntervals extends ActiveRegionWalker { @Output(required = true) private PrintStream out; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 9bb04421c..a3d764141 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -131,7 +131,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) -@ActiveRegionExtension(extension=65, maxRegion=300) +@ActiveRegionTraversalParameters(extension=65, maxRegion=300) //@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 52ac783a9..bff696f13 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -75,6 +75,7 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); @@ -102,9 +103,10 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, T sum, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { if ( ! walkerHasPresetRegions ) { // We don't have preset regions, so we get our regions from the activity profile - final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); + final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile); workQueue.addAll(activeRegions); if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java similarity index 54% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java index 72c409f62..cdb45db7b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java @@ -33,7 +33,8 @@ import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; /** - * Describes the size of the buffer region that is added to each active region when pulling in covered reads. + * Describes the parameters that this walker requires of the active region traversal + * * User: rpoplin * Date: 1/18/12 */ @@ -41,13 +42,40 @@ import java.lang.annotation.RetentionPolicy; @Inherited @Retention(RetentionPolicy.RUNTIME) -public @interface ActiveRegionExtension { +public @interface ActiveRegionTraversalParameters { + /** + * How far to either side of the active region itself should we include reads? + * + * That is, if the active region is 10 bp wide, and extension is 5, ART will provide + * the walker with active regions 10 bp, with 5 bp of extension on either side, and + * all reads that cover the 20 bp of the region + extension. + * + * @return the size of the active region extension we'd like + */ public int extension() default 0; + + /** + * The minimum number of bp for an active region, when we need to chop it up into pieces because + * it's become too big. This only comes into effect when there's literally no good place to chop + * that does make the region smaller than this value. + * + * @return the min size in bp of regions + */ + public int minRegion() default 50; + + /** + * The maximum size in bp of active regions wanted by this walker + * + * Active regions larger than this value are automatically cut up by ART into smaller + * regions of size <= this value. + * + * @return the max size in bp of regions + */ public int maxRegion() default 1500; /** - * The sigma value for the Gaussian kernel of the band pass filter - * @return + * The variance value for the Gaussian kernel of the band pass filter employed by ART + * @return the breadth of the band pass gaussian kernel we want for our traversal */ public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 92504e3ba..e14e50b1a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -55,7 +55,7 @@ import java.util.*; @By(DataSource.READS) @Requires({DataSource.READS, DataSource.REFERENCE}) @PartitionBy(PartitionType.READ) -@ActiveRegionExtension(extension=50,maxRegion=1500) +@ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { @@ -160,7 +160,7 @@ public abstract class ActiveRegionWalker extends Walker allIntervals = new ArrayList(); for( final GenomeLoc interval : intervals.toList() ) { final int start = Math.max( 1, interval.getStart() - activeRegionExtension ); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 16cb2fd84..f265f9d60 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -41,7 +41,7 @@ import java.util.*; */ public class ActivityProfile { private final static int MAX_PROB_PROPOGATION_DISTANCE = 50; - private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author + protected final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author protected final List stateList; protected final GenomeLocParser parser; @@ -197,7 +197,6 @@ public class ActivityProfile { regionStopLoc = loc; contigLength = parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); } else { - // TODO -- need to figure out where to add loc as the regions will be popping off the front if ( regionStopLoc.getStart() != loc.getStart() - 1 ) throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); regionStopLoc = loc; @@ -294,6 +293,7 @@ public class ActivityProfile { * No returned region will be larger than maxRegionSize. * * @param activeRegionExtension the extension value to provide to the constructed regions + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such @@ -301,14 +301,15 @@ public class ActivityProfile { * @return a non-null list of active regions */ @Ensures("result != null") - public List popReadyActiveRegions(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + public List popReadyActiveRegions(final int activeRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { if ( activeRegionExtension < 0 ) throw new IllegalArgumentException("activeRegionExtension must be >= 0 but got " + activeRegionExtension); + if ( minRegionSize < 1 ) throw new IllegalArgumentException("minRegionSize must be >= 1 but got " + minRegionSize); if ( maxRegionSize < 1 ) throw new IllegalArgumentException("maxRegionSize must be >= 1 but got " + maxRegionSize); final LinkedList regions = new LinkedList(); while ( true ) { - final ActiveRegion nextRegion = popNextReadyActiveRegion(activeRegionExtension, maxRegionSize, forceConversion); + final ActiveRegion nextRegion = popNextReadyActiveRegion(activeRegionExtension, minRegionSize, maxRegionSize, forceConversion); if ( nextRegion == null ) return regions; else { @@ -325,19 +326,20 @@ public class ActivityProfile { * are also updated. * * @param activeRegionExtension the extension value to provide to the constructed regions + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such * as the end of the contig) * @return a fully formed active region, or null if none can be made */ - private ActiveRegion popNextReadyActiveRegion(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + private ActiveRegion popNextReadyActiveRegion(final int activeRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { if ( stateList.isEmpty() ) return null; final ActivityProfileState first = stateList.get(0); final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD; - final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, maxRegionSize, forceConversion); + final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion); if ( offsetOfNextRegionEnd == -1 ) // couldn't find a valid ending offset, so we return null return null; @@ -363,9 +365,14 @@ public class ActivityProfile { * The current region is defined from the start of the stateList, looking for elements that have the same isActiveRegion * flag (i.e., if isActiveRegion is true we are looking for states with isActiveProb > threshold, or alternatively * for states < threshold). The maximize size of the returned region is maxRegionSize. If forceConversion is - * true, then we'll return the region end even if this isn't safely beyond the max prob propogation distance. + * true, then we'll return the region end even if this isn't safely beyond the max prob propagation distance. + * + * Note that if isActiveRegion is true, and we can construct a active region > maxRegionSize in bp, we + * find the further local minimum within that max region, and cut the region there, under the constraint + * that the resulting region must be at least minRegionSize in bp. * * @param isActiveRegion is the region we're looking for an active region or inactive region? + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such @@ -376,16 +383,65 @@ public class ActivityProfile { "result >= -1", "result == -1 || result < maxRegionSize", "! (result == -1 && forceConversion)"}) - private int findEndOfRegion(final boolean isActiveRegion, final int maxRegionSize, final boolean forceConversion) { - int i = 0; - while ( i < stateList.size() && i < maxRegionSize ) { - if ( stateList.get(i).isActiveProb > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { + private int findEndOfRegion(final boolean isActiveRegion, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { + final int nStates = stateList.size(); + int endOfActiveRegion = 0; + while ( endOfActiveRegion < nStates && endOfActiveRegion < maxRegionSize ) { + if ( getProb(endOfActiveRegion) > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { break; } - i++; + endOfActiveRegion++; + } + + if ( isActiveRegion && endOfActiveRegion == maxRegionSize ) { + // we've run to the end of the region, let's find a good place to cut + int minI = endOfActiveRegion - 1; + double minP = Double.MAX_VALUE; + for ( int i = minI; i >= minRegionSize - 1; i-- ) { + double cur = getProb(i); + if ( cur < minP && isMinimum(i) ) { + minP = cur; + minI = i; + } + } + + endOfActiveRegion = minI + 1; } // we're one past the end, so i must be decremented - return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1; + return forceConversion || endOfActiveRegion + getMaxProbPropagationDistance() < stateList.size() ? endOfActiveRegion - 1 : -1; + } + + /** + * Helper function to get the probability of the state at offset index + * @param index a valid offset into the state list + * @return the isActiveProb of the state at index + */ + @Requires({"index >= 0", "index < stateList.size()"}) + private double getProb(final int index) { + return stateList.get(index).isActiveProb; + } + + /** + * Is the probability at index in a local minimum? + * + * Checks that the probability at index is <= both the probabilities to either side. + * Returns false if index is at the end or the start of the state list. + * + * @param index the index of the state we want to test + * @return true if prob at state is a minimum, false otherwise + */ + @Requires({"index >= 0", "index < stateList.size()"}) + private boolean isMinimum(final int index) { + if ( index == stateList.size() - 1 ) + // we cannot be at a minimum if the current position is the last in the state list + return false; + else if ( index < 1 ) + // we cannot be at a minimum if the current position is the first or second + return false; + else { + final double indexP = getProb(index); + return indexP <= getProb(index+1) && indexP < getProb(index-1); + } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index 272596be3..5bba7db17 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -59,7 +59,10 @@ public class ActivityProfileState { /** * Create a new ActivityProfileState at loc with probability of being active of isActiveProb that maintains some - * information about the result state and value (TODO RYAN -- what do these mean?) + * information about the result state and value + * + * The only state value in use is HIGH_QUALITY_SOFT_CLIPS, and here the value is interpreted as the number + * of bp affected by the soft clips. * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index bce1722cd..f6246b137 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -33,6 +33,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; @@ -46,6 +47,7 @@ import java.util.*; public class ActivityProfileUnitTest extends BaseTest { + private final static boolean DEBUG = false; private GenomeLocParser genomeLocParser; private GenomeLoc startLoc; @@ -123,7 +125,7 @@ public class ActivityProfileUnitTest extends BaseTest { return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); } - @Test(dataProvider = "BasicActivityProfileTestProvider") + @Test(enabled = ! DEBUG, dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { ActivityProfile profile = cfg.makeProfile(); @@ -226,7 +228,7 @@ public class ActivityProfileUnitTest extends BaseTest { } - @Test(enabled = true, dataProvider = "RegionCreationTests") + @Test(enabled = !DEBUG, dataProvider = "RegionCreationTests") public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { final ActivityProfile profile = new ActivityProfile(genomeLocParser); Assert.assertNotNull(profile.toString()); @@ -242,13 +244,13 @@ public class ActivityProfileUnitTest extends BaseTest { Assert.assertNotNull(profile.toString()); if ( ! waitUntilEnd ) { - final List regions = profile.popReadyActiveRegions(0, maxRegionSize, false); + final List regions = profile.popReadyActiveRegions(0, 1, maxRegionSize, false); lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); } } if ( waitUntilEnd || forceConversion ) { - final List regions = profile.popReadyActiveRegions(0, maxRegionSize, forceConversion); + final List regions = profile.popReadyActiveRegions(0, 1, maxRegionSize, forceConversion); lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); } @@ -312,7 +314,7 @@ public class ActivityProfileUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "SoftClipsTest") + @Test(enabled = ! DEBUG, dataProvider = "SoftClipsTest") public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { final ActivityProfile profile = new ActivityProfile(genomeLocParser); @@ -327,14 +329,15 @@ public class ActivityProfileUnitTest extends BaseTest { final GenomeLoc softClipLoc = genomeLocParser.createGenomeLoc(contig, nPrecedingSites + start); profile.add(new ActivityProfileState(softClipLoc, 1.0, ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS, softClipSize)); + final int actualNumOfSoftClips = Math.min(softClipSize, profile.getMaxProbPropagationDistance()); if ( nPrecedingSites == 0 ) { - final int profileSize = Math.min(start + softClipSize, contigLength) - start + 1; + final int profileSize = Math.min(start + actualNumOfSoftClips, contigLength) - start + 1; Assert.assertEquals(profile.size(), profileSize, "Wrong number of states in the profile"); } for ( int i = 0; i < profile.size(); i++ ) { final ActivityProfileState state = profile.getStateList().get(i); - final boolean withinSCRange = state.getLoc().distance(softClipLoc) <= softClipSize; + final boolean withinSCRange = state.getLoc().distance(softClipLoc) <= actualNumOfSoftClips; if ( withinSCRange ) { Assert.assertTrue(state.isActiveProb > 0.0, "active prob should be changed within soft clip size"); } else { @@ -342,4 +345,157 @@ public class ActivityProfileUnitTest extends BaseTest { } } } + + // ------------------------------------------------------------------------------------- + // + // Tests to ensure we cut large active regions in the right place + // + // ------------------------------------------------------------------------------------- + + private void addProb(final List l, final double v) { + l.add(v); + } + + @DataProvider(name = "ActiveRegionCutTests") + public Object[][] makeActiveRegionCutTests() { + final List tests = new LinkedList(); + +// for ( final int activeRegionSize : Arrays.asList(30) ) { +// for ( final int minRegionSize : Arrays.asList(5) ) { + for ( final int activeRegionSize : Arrays.asList(10, 12, 20, 30, 40) ) { + for ( final int minRegionSize : Arrays.asList(1, 5, 10) ) { + final int maxRegionSize = activeRegionSize * 2 / 3; + if ( minRegionSize >= maxRegionSize ) continue; + { // test flat activity profile + final List probs = Collections.nCopies(activeRegionSize, 1.0); + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test point profile is properly handled + for ( int end = 1; end < activeRegionSize; end++ ) { + final List probs = Collections.nCopies(end, 1.0); + tests.add(new Object[]{minRegionSize, maxRegionSize, Math.min(end, maxRegionSize), probs}); + } + } + + { // test increasing activity profile + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, (1.0*(i+1))/ activeRegionSize); + } + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test decreasing activity profile + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, 1 - (1.0*(i+1))/ activeRegionSize); + } + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test two peaks +// for ( final double rootSigma : Arrays.asList(2.0) ) { +// int maxPeak1 = 9; { +// int maxPeak2 = 16; { + for ( final double rootSigma : Arrays.asList(1.0, 2.0, 3.0) ) { + for ( int maxPeak1 = 0; maxPeak1 < activeRegionSize / 2; maxPeak1++ ) { + for ( int maxPeak2 = activeRegionSize / 2 + 1; maxPeak2 < activeRegionSize; maxPeak2++ ) { + final double[] gauss1 = makeGaussian(maxPeak1, activeRegionSize, rootSigma); + final double[] gauss2 = makeGaussian(maxPeak2, activeRegionSize, rootSigma+1); + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, gauss1[i] + gauss2[i]); + } + final int cutSite = findCutSiteForTwoMaxPeaks(probs, minRegionSize); + if ( cutSite != -1 && cutSite < maxRegionSize ) + tests.add(new Object[]{minRegionSize, maxRegionSize, Math.max(cutSite, minRegionSize), probs}); + } + } + } + } + + { // test that the lowest of two minima is taken + // looks like a bunch of 1s, 0.5, some 1.0s, 0.75, some more 1s +// int firstMin = 0; { +// int secondMin = 4; { + for ( int firstMin = 1; firstMin < activeRegionSize; firstMin++ ) { + for ( int secondMin = firstMin + 1; secondMin < activeRegionSize; secondMin++ ) { + final List probs = new ArrayList(Collections.nCopies(activeRegionSize, 1.0)); + probs.set(firstMin, 0.5); + probs.set(secondMin, 0.75); + final int expectedCut; + if ( firstMin + 1 < minRegionSize ) { + if ( firstMin == secondMin - 1 ) // edge case for non-min at minRegionSize + expectedCut = maxRegionSize; + else + expectedCut = secondMin + 1 > maxRegionSize ? maxRegionSize : ( secondMin + 1 < minRegionSize ? maxRegionSize : secondMin + 1); + } else if ( firstMin + 1 > maxRegionSize ) + expectedCut = maxRegionSize; + else { + expectedCut = firstMin + 1; + } + + Math.min(firstMin + 1, maxRegionSize); + tests.add(new Object[]{minRegionSize, maxRegionSize, expectedCut, probs}); + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private double[] makeGaussian(final int mean, final int range, final double sigma) { + final double[] gauss = new double[range]; + for( int iii = 0; iii < range; iii++ ) { + gauss[iii] = MathUtils.NormalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD; + } + return gauss; + } + + private int findCutSiteForTwoMaxPeaks(final List probs, final int minRegionSize) { + for ( int i = probs.size() - 2; i > minRegionSize; i-- ) { + double prev = probs.get(i - 1); + double next = probs.get(i + 1); + double cur = probs.get(i); + if ( cur < next && cur < prev ) + return i + 1; + } + + return -1; + } + + +// private int findCutSite(final List probs) { +// for ( int i = probs.size() - 2; i > 0; i-- ) { +// double prev = probs.get(i + 1); +// double next = probs.get(i-1); +// double cur = probs.get(i); +// if ( cur < next && cur < prev ) +// return i + 1; +// } +// +// return -1; +// } + + @Test(dataProvider = "ActiveRegionCutTests") + public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List probs) { + final ActivityProfile profile = new ActivityProfile(genomeLocParser); + + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + for ( int i = 0; i <= maxRegionSize + profile.getMaxProbPropagationDistance(); i++ ) { + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + 1); + final double prob = i < probs.size() ? probs.get(i) : 0.0; + final ActivityProfileState state = new ActivityProfileState(loc, prob); + profile.add(state); + } + + final List regions = profile.popReadyActiveRegions(0, minRegionSize, maxRegionSize, false); + Assert.assertTrue(regions.size() >= 1, "Should only be one regions for this test"); + final ActiveRegion region = regions.get(0); + Assert.assertEquals(region.getLocation().getStart(), 1, "Region should start at 1"); + Assert.assertEquals(region.getLocation().size(), expectedRegionSize, "Incorrect region size; cut must have been incorrect"); + } } \ No newline at end of file