From 0c94e3d96e0a6c1251bb814fbbd18e9a93511d57 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 15:41:56 -0500 Subject: [PATCH] Adaptively compute the band pass filter from the sigma, up to a maximum size of 50 bp -- Previously we allowed band pass filter size to be specified along with the sigma. But now that sigma is controllable from walkers and from the command line, we instead compute the filter size given the kernel from the sigma, including all kernel points with p > 1e-5 in the kernel. This means that if you use a smaller kernel you get a small band size and therefore faster ART -- Update, as discussed with Ryan, the sigma and band size to 17 bp for HC (default ART wide) and max band size of 50 bp --- .../traversals/TraverseActiveRegions.java | 3 +- .../gatk/walkers/ActiveRegionExtension.java | 8 --- .../gatk/walkers/ActiveRegionWalker.java | 4 -- .../activeregion/BandPassActivityProfile.java | 52 +++++++++++------- .../activeregion/ActivityProfileUnitTest.java | 4 +- .../BandPassActivityProfileUnitTest.java | 53 ++++++++++++++++++- 6 files changed, 87 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 21971e189..52ac783a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -105,11 +105,10 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker 5, etc. + * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma + * @param sigma the variance of the Gaussian kernel for this band pass filter */ - public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) { + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) { + this(parser, maxFilterSize, sigma, true); + } + + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { super(parser); - if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); // setup the Gaussian kernel for the band pass filter - this.filterSize = filterSize; this.sigma = sigma; - final double[] kernel = new double[getBandSize()]; - for( int iii = 0; iii < 2* filterSize + 1; iii++ ) { + final double[] fullKernel = makeKernel(maxFilterSize, sigma); + this.filterSize = adaptiveFilterSize ? determineFilterSize(fullKernel, MIN_PROB_TO_KEEP_IN_FILTER) : maxFilterSize; + this.GaussianKernel = makeKernel(this.filterSize, sigma); + } + + protected static int determineFilterSize(final double[] kernel, final double minProbToKeepInFilter) { + final int middle = (kernel.length - 1) / 2; + int filterEnd = middle; + while ( filterEnd > 0 ) { + if ( kernel[filterEnd - 1] < minProbToKeepInFilter ) { + break; + } + filterEnd--; + } + return middle - filterEnd; + } + + protected static double[] makeKernel(final int filterSize, final double sigma) { + final int bandSize = 2 * filterSize + 1; + final double[] kernel = new double[bandSize]; + for( int iii = 0; iii < bandSize; iii++ ) { kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii); } - this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); + return MathUtils.normalizeFromRealSpace(kernel); } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 311d43206..bce1722cd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -87,7 +87,7 @@ public class ActivityProfileUnitTest extends BaseTest { case Base: return new ActivityProfile(genomeLocParser); case BandPass: // zero size => equivalent to ActivityProfile - return new BandPassActivityProfile(genomeLocParser, 0); + return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false); default: throw new IllegalStateException(type.toString()); } } @@ -98,7 +98,7 @@ public class ActivityProfileUnitTest extends BaseTest { int start = regionStart.getStart() + startsAndStops[i]; int end = regionStart.getStart() + startsAndStops[i+1] - 1; GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, null, isActive, genomeLocParser, extension); + ActiveRegion r = new ActiveRegion(activeLoc, Collections.emptyList(), isActive, genomeLocParser, extension); l.add(r); isActive = ! isActive; } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index 0a71bad14..ff1c9bdef 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -81,7 +81,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); @@ -124,7 +124,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { public Object[][] makeBandPassComposition() { final List tests = new LinkedList(); - for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.MAX_FILTER_SIZE) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { tests.add(new Object[]{ bandPassSize, integrationLength }); } @@ -167,4 +167,53 @@ public class BandPassActivityProfileUnitTest extends BaseTest { Assert.assertEquals(profile.getStateList().get(j).isActiveProb, expectedProbs[j], "State probability not expected at " + j); } } + + // ------------------------------------------------------------------------------------ + // + // Code to test the creation of the kernels + // + // ------------------------------------------------------------------------------------ + + /** + + kernel <- function(sd, pThres) { + raw = dnorm(-80:81, mean=0, sd=sd) + norm = raw / sum(raw) + bad = norm < pThres + paste(norm[! bad], collapse=", ") + } + + print(kernel(0.01, 1e-5)) + print(kernel(1, 1e-5)) + print(kernel(5, 1e-5)) + print(kernel(17, 1e-5)) + + * @return + */ + + @DataProvider(name = "KernelCreation") + public Object[][] makeKernelCreation() { + final List tests = new LinkedList(); + + tests.add(new Object[]{ 0.01, 1000, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 1.0, 0, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1, new double[]{0.2740686, 0.4518628, 0.2740686}}); + tests.add(new Object[]{ 1.0, 2, new double[]{0.05448868, 0.24420134, 0.40261995, 0.24420134, 0.05448868}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 5.0, 1000, new double[]{1.1788613551308e-05, 2.67660451529771e-05, 5.83893851582921e-05, 0.000122380386022754, 0.000246443833694604, 0.000476817640292968, 0.000886369682387602, 0.00158309031659599, 0.00271659384673712, 0.00447890605896858, 0.00709491856924629, 0.0107981933026376, 0.0157900316601788, 0.0221841669358911, 0.029945493127149, 0.0388372109966426, 0.0483941449038287, 0.0579383105522965, 0.0666449205783599, 0.0736540280606647, 0.0782085387950912, 0.0797884560802865, 0.0782085387950912, 0.0736540280606647, 0.0666449205783599, 0.0579383105522965, 0.0483941449038287, 0.0388372109966426, 0.029945493127149, 0.0221841669358911, 0.0157900316601788, 0.0107981933026376, 0.00709491856924629, 0.00447890605896858, 0.00271659384673712, 0.00158309031659599, 0.000886369682387602, 0.000476817640292968, 0.000246443833694604, 0.000122380386022754, 5.83893851582921e-05, 2.67660451529771e-05, 1.1788613551308e-05}}); + tests.add(new Object[]{17.0, 1000, new double[]{1.25162575710745e-05, 1.57001772728555e-05, 1.96260034693739e-05, 2.44487374842009e-05, 3.03513668801384e-05, 3.75489089511911e-05, 4.62928204154855e-05, 5.68757597480354e-05, 6.96366758708924e-05, 8.49661819944029e-05, 0.000103312156275406, 0.000125185491708561, 0.000151165896477646, 0.000181907623161359, 0.000218144981137171, 0.000260697461819069, 0.000310474281706066, 0.000368478124457557, 0.000435807841336874, 0.00051365985048857, 0.000603327960854364, 0.000706201337376934, 0.000823760321812988, 0.000957569829285965, 0.00110927005589186, 0.00128056425833231, 0.00147320340358764, 0.00168896753568649, 0.00192964376796036, 0.00219700088266432, 0.00249276060490197, 0.00281856571330067, 0.00317594525418154, 0.00356627723683793, 0.00399074930220799, 0.00445031797242299, 0.00494566720070898, 0.00547716704583487, 0.00604483338842317, 0.00664828968356621, 0.00728673180099395, 0.00795889703644795, 0.00866303838230695, 0.00939690511889675, 0.0101577307281371, 0.010942229037054, 0.0117465993701676, 0.0125665413280325, 0.0133972796167302, 0.0142335991336574, 0.0150698902735454, 0.0159002041614507, 0.0167183172536454, 0.0175178044808441, 0.0182921198494897, 0.0190346831745763, 0.0197389714002676, 0.020398612780527, 0.0210074820484496, 0.0215597946062309, 0.0220501977225941, 0.022473856734247, 0.0228265343139947, 0.0231046609899767, 0.0233053952756892, 0.0234266719946158, 0.0234672376502799, 0.0234266719946158, 0.0233053952756892, 0.0231046609899767, 0.0228265343139947, 0.022473856734247, 0.0220501977225941, 0.0215597946062309, 0.0210074820484496, 0.020398612780527, 0.0197389714002676, 0.0190346831745763, 0.0182921198494897, 0.0175178044808441, 0.0167183172536454, 0.0159002041614507, 0.0150698902735454, 0.0142335991336574, 0.0133972796167302, 0.0125665413280325, 0.0117465993701676, 0.010942229037054, 0.0101577307281371, 0.00939690511889675, 0.00866303838230695, 0.00795889703644795, 0.00728673180099395, 0.00664828968356621, 0.00604483338842317, 0.00547716704583487, 0.00494566720070898, 0.00445031797242299, 0.00399074930220799, 0.00356627723683793, 0.00317594525418154, 0.00281856571330067, 0.00249276060490197, 0.00219700088266432, 0.00192964376796036, 0.00168896753568649, 0.00147320340358764, 0.00128056425833231, 0.00110927005589186, 0.000957569829285965, 0.000823760321812988, 0.000706201337376934, 0.000603327960854364, 0.00051365985048857, 0.000435807841336874, 0.000368478124457557, 0.000310474281706066, 0.000260697461819069, 0.000218144981137171, 0.000181907623161359, 0.000151165896477646, 0.000125185491708561, 0.000103312156275406, 8.49661819944029e-05, 6.96366758708924e-05, 5.68757597480354e-05, 4.62928204154855e-05, 3.75489089511911e-05, 3.03513668801384e-05, 2.44487374842009e-05, 1.96260034693739e-05, 1.57001772728555e-05, 1.25162575710745e-05}}); + + return tests.toArray(new Object[][]{}); + } + + @Test( dataProvider = "KernelCreation") + public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) { + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true); + + final double[] kernel = profile.getKernel(); + Assert.assertEquals(kernel.length, expectedKernel.length); + for ( int i = 0; i < kernel.length; i++ ) + Assert.assertEquals(kernel[i], expectedKernel[i], 1e-3, "Kernels not equal at " + i); + } }