diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 21971e189..52ac783a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -105,11 +105,10 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker 5, etc. + * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma + * @param sigma the variance of the Gaussian kernel for this band pass filter */ - public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) { + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) { + this(parser, maxFilterSize, sigma, true); + } + + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { super(parser); - if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); // setup the Gaussian kernel for the band pass filter - this.filterSize = filterSize; this.sigma = sigma; - final double[] kernel = new double[getBandSize()]; - for( int iii = 0; iii < 2* filterSize + 1; iii++ ) { + final double[] fullKernel = makeKernel(maxFilterSize, sigma); + this.filterSize = adaptiveFilterSize ? determineFilterSize(fullKernel, MIN_PROB_TO_KEEP_IN_FILTER) : maxFilterSize; + this.GaussianKernel = makeKernel(this.filterSize, sigma); + } + + protected static int determineFilterSize(final double[] kernel, final double minProbToKeepInFilter) { + final int middle = (kernel.length - 1) / 2; + int filterEnd = middle; + while ( filterEnd > 0 ) { + if ( kernel[filterEnd - 1] < minProbToKeepInFilter ) { + break; + } + filterEnd--; + } + return middle - filterEnd; + } + + protected static double[] makeKernel(final int filterSize, final double sigma) { + final int bandSize = 2 * filterSize + 1; + final double[] kernel = new double[bandSize]; + for( int iii = 0; iii < bandSize; iii++ ) { kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii); } - this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); + return MathUtils.normalizeFromRealSpace(kernel); } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 311d43206..bce1722cd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -87,7 +87,7 @@ public class ActivityProfileUnitTest extends BaseTest { case Base: return new ActivityProfile(genomeLocParser); case BandPass: // zero size => equivalent to ActivityProfile - return new BandPassActivityProfile(genomeLocParser, 0); + return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false); default: throw new IllegalStateException(type.toString()); } } @@ -98,7 +98,7 @@ public class ActivityProfileUnitTest extends BaseTest { int start = regionStart.getStart() + startsAndStops[i]; int end = regionStart.getStart() + startsAndStops[i+1] - 1; GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, null, isActive, genomeLocParser, extension); + ActiveRegion r = new ActiveRegion(activeLoc, Collections.emptyList(), isActive, genomeLocParser, extension); l.add(r); isActive = ! isActive; } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index 0a71bad14..ff1c9bdef 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -81,7 +81,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); @@ -124,7 +124,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { public Object[][] makeBandPassComposition() { final List tests = new LinkedList(); - for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.MAX_FILTER_SIZE) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { tests.add(new Object[]{ bandPassSize, integrationLength }); } @@ -167,4 +167,53 @@ public class BandPassActivityProfileUnitTest extends BaseTest { Assert.assertEquals(profile.getStateList().get(j).isActiveProb, expectedProbs[j], "State probability not expected at " + j); } } + + // ------------------------------------------------------------------------------------ + // + // Code to test the creation of the kernels + // + // ------------------------------------------------------------------------------------ + + /** + + kernel <- function(sd, pThres) { + raw = dnorm(-80:81, mean=0, sd=sd) + norm = raw / sum(raw) + bad = norm < pThres + paste(norm[! bad], collapse=", ") + } + + print(kernel(0.01, 1e-5)) + print(kernel(1, 1e-5)) + print(kernel(5, 1e-5)) + print(kernel(17, 1e-5)) + + * @return + */ + + @DataProvider(name = "KernelCreation") + public Object[][] makeKernelCreation() { + final List tests = new LinkedList(); + + tests.add(new Object[]{ 0.01, 1000, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 1.0, 0, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1, new double[]{0.2740686, 0.4518628, 0.2740686}}); + tests.add(new Object[]{ 1.0, 2, new double[]{0.05448868, 0.24420134, 0.40261995, 0.24420134, 0.05448868}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 5.0, 1000, new double[]{1.1788613551308e-05, 2.67660451529771e-05, 5.83893851582921e-05, 0.000122380386022754, 0.000246443833694604, 0.000476817640292968, 0.000886369682387602, 0.00158309031659599, 0.00271659384673712, 0.00447890605896858, 0.00709491856924629, 0.0107981933026376, 0.0157900316601788, 0.0221841669358911, 0.029945493127149, 0.0388372109966426, 0.0483941449038287, 0.0579383105522965, 0.0666449205783599, 0.0736540280606647, 0.0782085387950912, 0.0797884560802865, 0.0782085387950912, 0.0736540280606647, 0.0666449205783599, 0.0579383105522965, 0.0483941449038287, 0.0388372109966426, 0.029945493127149, 0.0221841669358911, 0.0157900316601788, 0.0107981933026376, 0.00709491856924629, 0.00447890605896858, 0.00271659384673712, 0.00158309031659599, 0.000886369682387602, 0.000476817640292968, 0.000246443833694604, 0.000122380386022754, 5.83893851582921e-05, 2.67660451529771e-05, 1.1788613551308e-05}}); + tests.add(new Object[]{17.0, 1000, new double[]{1.25162575710745e-05, 1.57001772728555e-05, 1.96260034693739e-05, 2.44487374842009e-05, 3.03513668801384e-05, 3.75489089511911e-05, 4.62928204154855e-05, 5.68757597480354e-05, 6.96366758708924e-05, 8.49661819944029e-05, 0.000103312156275406, 0.000125185491708561, 0.000151165896477646, 0.000181907623161359, 0.000218144981137171, 0.000260697461819069, 0.000310474281706066, 0.000368478124457557, 0.000435807841336874, 0.00051365985048857, 0.000603327960854364, 0.000706201337376934, 0.000823760321812988, 0.000957569829285965, 0.00110927005589186, 0.00128056425833231, 0.00147320340358764, 0.00168896753568649, 0.00192964376796036, 0.00219700088266432, 0.00249276060490197, 0.00281856571330067, 0.00317594525418154, 0.00356627723683793, 0.00399074930220799, 0.00445031797242299, 0.00494566720070898, 0.00547716704583487, 0.00604483338842317, 0.00664828968356621, 0.00728673180099395, 0.00795889703644795, 0.00866303838230695, 0.00939690511889675, 0.0101577307281371, 0.010942229037054, 0.0117465993701676, 0.0125665413280325, 0.0133972796167302, 0.0142335991336574, 0.0150698902735454, 0.0159002041614507, 0.0167183172536454, 0.0175178044808441, 0.0182921198494897, 0.0190346831745763, 0.0197389714002676, 0.020398612780527, 0.0210074820484496, 0.0215597946062309, 0.0220501977225941, 0.022473856734247, 0.0228265343139947, 0.0231046609899767, 0.0233053952756892, 0.0234266719946158, 0.0234672376502799, 0.0234266719946158, 0.0233053952756892, 0.0231046609899767, 0.0228265343139947, 0.022473856734247, 0.0220501977225941, 0.0215597946062309, 0.0210074820484496, 0.020398612780527, 0.0197389714002676, 0.0190346831745763, 0.0182921198494897, 0.0175178044808441, 0.0167183172536454, 0.0159002041614507, 0.0150698902735454, 0.0142335991336574, 0.0133972796167302, 0.0125665413280325, 0.0117465993701676, 0.010942229037054, 0.0101577307281371, 0.00939690511889675, 0.00866303838230695, 0.00795889703644795, 0.00728673180099395, 0.00664828968356621, 0.00604483338842317, 0.00547716704583487, 0.00494566720070898, 0.00445031797242299, 0.00399074930220799, 0.00356627723683793, 0.00317594525418154, 0.00281856571330067, 0.00249276060490197, 0.00219700088266432, 0.00192964376796036, 0.00168896753568649, 0.00147320340358764, 0.00128056425833231, 0.00110927005589186, 0.000957569829285965, 0.000823760321812988, 0.000706201337376934, 0.000603327960854364, 0.00051365985048857, 0.000435807841336874, 0.000368478124457557, 0.000310474281706066, 0.000260697461819069, 0.000218144981137171, 0.000181907623161359, 0.000151165896477646, 0.000125185491708561, 0.000103312156275406, 8.49661819944029e-05, 6.96366758708924e-05, 5.68757597480354e-05, 4.62928204154855e-05, 3.75489089511911e-05, 3.03513668801384e-05, 2.44487374842009e-05, 1.96260034693739e-05, 1.57001772728555e-05, 1.25162575710745e-05}}); + + return tests.toArray(new Object[][]{}); + } + + @Test( dataProvider = "KernelCreation") + public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) { + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true); + + final double[] kernel = profile.getKernel(); + Assert.assertEquals(kernel.length, expectedKernel.length); + for ( int i = 0; i < kernel.length; i++ ) + Assert.assertEquals(kernel[i], expectedKernel[i], 1e-3, "Kernels not equal at " + i); + } }