Adaptively compute the band pass filter from the sigma, up to a maximum size of 50 bp

-- Previously we allowed band pass filter size to be specified along with the sigma.  But now that sigma is controllable from walkers and from the command line, we instead compute the filter size given the kernel from the sigma, including all kernel points with p > 1e-5 in the kernel.  This means that if you use a smaller kernel you get a small band size and therefore faster ART
-- Update, as discussed with Ryan, the sigma and band size to 17 bp for HC (default ART wide) and max band size of 50 bp
This commit is contained in:
Mark DePristo 2013-01-23 15:41:56 -05:00
parent 9e43a2028d
commit 0c94e3d96e
6 changed files with 87 additions and 37 deletions

View File

@ -105,11 +105,10 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
ActiveRegionExtension annotation = walker.getClass().getAnnotation(ActiveRegionExtension.class);
this.activeRegionExtension = this.walker.activeRegionExtension == null ? annotation.extension() : this.walker.activeRegionExtension;
this.maxRegionSize = this.walker.activeRegionMaxSize == null ? annotation.maxRegion() : this.walker.activeRegionMaxSize;
final int bandPassFilterSize = this.walker.bandPassFilterSize == null ? annotation.bandPassFilterSize() : this.walker.bandPassFilterSize;
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), bandPassFilterSize, bandPassSigma);
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {

View File

@ -45,14 +45,6 @@ public @interface ActiveRegionExtension {
public int extension() default 0;
public int maxRegion() default 1500;
/**
* The size of the band pass filter in bp. The filter size describes how far
* from the current site the band pass extends. So a value of 1 implies a total
* band size of 3 bp, the site bp and one on each side
* @return
*/
public int bandPassFilterSize() default BandPassActivityProfile.DEFAULT_FILTER_SIZE;
/**
* The sigma value for the Gaussian kernel of the band pass filter
* @return

View File

@ -92,10 +92,6 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
@Argument(fullName="activeRegionMaxSize", shortName="activeRegionMaxSize", doc="The active region maximum size; if not provided defaults to Walker annotated default", required = false)
public Integer activeRegionMaxSize = null;
@Advanced
@Argument(fullName="bandPassFilterSize", shortName="bandPassFilterSize", doc="The filter size of band pass filter; if not provided defaults to Walker annotated default", required = false)
public Integer bandPassFilterSize = null;
@Advanced
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
public Double bandPassSigma = null;

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
@ -43,42 +44,55 @@ import java.util.LinkedList;
* @since 2011
*/
public class BandPassActivityProfile extends ActivityProfile {
public static final int DEFAULT_FILTER_SIZE = 80;
public static final double DEFAULT_SIGMA = 55.0;
public static final int MAX_FILTER_SIZE = 50;
private final static double MIN_PROB_TO_KEEP_IN_FILTER = 1e-5;
public static final double DEFAULT_SIGMA = 17.0;
private final int filterSize;
private final double sigma;
private final double[] GaussianKernel;
/**
* Create a band pass activity profile with the default band size
* @param parser our genome loc parser
*/
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, DEFAULT_FILTER_SIZE, DEFAULT_SIGMA);
}
/**
* Create an activity profile that implements a band pass filter on the states
* @param parser our genome loc parser
* @param filterSize the size (in bp) of the band pass filter. The filter size is the number of bp to each
* side that are included in the band. So a filter size of 1 implies that the actual band
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
* @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
* @param sigma the variance of the Gaussian kernel for this band pass filter
*/
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) {
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) {
this(parser, maxFilterSize, sigma, true);
}
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
super(parser);
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
// setup the Gaussian kernel for the band pass filter
this.filterSize = filterSize;
this.sigma = sigma;
final double[] kernel = new double[getBandSize()];
for( int iii = 0; iii < 2* filterSize + 1; iii++ ) {
final double[] fullKernel = makeKernel(maxFilterSize, sigma);
this.filterSize = adaptiveFilterSize ? determineFilterSize(fullKernel, MIN_PROB_TO_KEEP_IN_FILTER) : maxFilterSize;
this.GaussianKernel = makeKernel(this.filterSize, sigma);
}
protected static int determineFilterSize(final double[] kernel, final double minProbToKeepInFilter) {
final int middle = (kernel.length - 1) / 2;
int filterEnd = middle;
while ( filterEnd > 0 ) {
if ( kernel[filterEnd - 1] < minProbToKeepInFilter ) {
break;
}
filterEnd--;
}
return middle - filterEnd;
}
protected static double[] makeKernel(final int filterSize, final double sigma) {
final int bandSize = 2 * filterSize + 1;
final double[] kernel = new double[bandSize];
for( int iii = 0; iii < bandSize; iii++ ) {
kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii);
}
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
return MathUtils.normalizeFromRealSpace(kernel);
}
/**

View File

@ -87,7 +87,7 @@ public class ActivityProfileUnitTest extends BaseTest {
case Base: return new ActivityProfile(genomeLocParser);
case BandPass:
// zero size => equivalent to ActivityProfile
return new BandPassActivityProfile(genomeLocParser, 0);
return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false);
default: throw new IllegalStateException(type.toString());
}
}
@ -98,7 +98,7 @@ public class ActivityProfileUnitTest extends BaseTest {
int start = regionStart.getStart() + startsAndStops[i];
int end = regionStart.getStart() + startsAndStops[i+1] - 1;
GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end);
ActiveRegion r = new ActiveRegion(activeLoc, null, isActive, genomeLocParser, extension);
ActiveRegion r = new ActiveRegion(activeLoc, Collections.<ActivityProfileState>emptyList(), isActive, genomeLocParser, extension);
l.add(r);
isActive = ! isActive;
}

View File

@ -81,7 +81,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false);
final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
@ -124,7 +124,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
public Object[][] makeBandPassComposition() {
final List<Object[]> tests = new LinkedList<Object[]>();
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) {
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.MAX_FILTER_SIZE) ) {
for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) {
tests.add(new Object[]{ bandPassSize, integrationLength });
}
@ -167,4 +167,53 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
Assert.assertEquals(profile.getStateList().get(j).isActiveProb, expectedProbs[j], "State probability not expected at " + j);
}
}
// ------------------------------------------------------------------------------------
//
// Code to test the creation of the kernels
//
// ------------------------------------------------------------------------------------
/**
kernel <- function(sd, pThres) {
raw = dnorm(-80:81, mean=0, sd=sd)
norm = raw / sum(raw)
bad = norm < pThres
paste(norm[! bad], collapse=", ")
}
print(kernel(0.01, 1e-5))
print(kernel(1, 1e-5))
print(kernel(5, 1e-5))
print(kernel(17, 1e-5))
* @return
*/
@DataProvider(name = "KernelCreation")
public Object[][] makeKernelCreation() {
final List<Object[]> tests = new LinkedList<Object[]>();
tests.add(new Object[]{ 0.01, 1000, new double[]{1.0}});
tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}});
tests.add(new Object[]{ 1.0, 0, new double[]{1.0}});
tests.add(new Object[]{ 1.0, 1, new double[]{0.2740686, 0.4518628, 0.2740686}});
tests.add(new Object[]{ 1.0, 2, new double[]{0.05448868, 0.24420134, 0.40261995, 0.24420134, 0.05448868}});
tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}});
tests.add(new Object[]{ 5.0, 1000, new double[]{1.1788613551308e-05, 2.67660451529771e-05, 5.83893851582921e-05, 0.000122380386022754, 0.000246443833694604, 0.000476817640292968, 0.000886369682387602, 0.00158309031659599, 0.00271659384673712, 0.00447890605896858, 0.00709491856924629, 0.0107981933026376, 0.0157900316601788, 0.0221841669358911, 0.029945493127149, 0.0388372109966426, 0.0483941449038287, 0.0579383105522965, 0.0666449205783599, 0.0736540280606647, 0.0782085387950912, 0.0797884560802865, 0.0782085387950912, 0.0736540280606647, 0.0666449205783599, 0.0579383105522965, 0.0483941449038287, 0.0388372109966426, 0.029945493127149, 0.0221841669358911, 0.0157900316601788, 0.0107981933026376, 0.00709491856924629, 0.00447890605896858, 0.00271659384673712, 0.00158309031659599, 0.000886369682387602, 0.000476817640292968, 0.000246443833694604, 0.000122380386022754, 5.83893851582921e-05, 2.67660451529771e-05, 1.1788613551308e-05}});
tests.add(new Object[]{17.0, 1000, new double[]{1.25162575710745e-05, 1.57001772728555e-05, 1.96260034693739e-05, 2.44487374842009e-05, 3.03513668801384e-05, 3.75489089511911e-05, 4.62928204154855e-05, 5.68757597480354e-05, 6.96366758708924e-05, 8.49661819944029e-05, 0.000103312156275406, 0.000125185491708561, 0.000151165896477646, 0.000181907623161359, 0.000218144981137171, 0.000260697461819069, 0.000310474281706066, 0.000368478124457557, 0.000435807841336874, 0.00051365985048857, 0.000603327960854364, 0.000706201337376934, 0.000823760321812988, 0.000957569829285965, 0.00110927005589186, 0.00128056425833231, 0.00147320340358764, 0.00168896753568649, 0.00192964376796036, 0.00219700088266432, 0.00249276060490197, 0.00281856571330067, 0.00317594525418154, 0.00356627723683793, 0.00399074930220799, 0.00445031797242299, 0.00494566720070898, 0.00547716704583487, 0.00604483338842317, 0.00664828968356621, 0.00728673180099395, 0.00795889703644795, 0.00866303838230695, 0.00939690511889675, 0.0101577307281371, 0.010942229037054, 0.0117465993701676, 0.0125665413280325, 0.0133972796167302, 0.0142335991336574, 0.0150698902735454, 0.0159002041614507, 0.0167183172536454, 0.0175178044808441, 0.0182921198494897, 0.0190346831745763, 0.0197389714002676, 0.020398612780527, 0.0210074820484496, 0.0215597946062309, 0.0220501977225941, 0.022473856734247, 0.0228265343139947, 0.0231046609899767, 0.0233053952756892, 0.0234266719946158, 0.0234672376502799, 0.0234266719946158, 0.0233053952756892, 0.0231046609899767, 0.0228265343139947, 0.022473856734247, 0.0220501977225941, 0.0215597946062309, 0.0210074820484496, 0.020398612780527, 0.0197389714002676, 0.0190346831745763, 0.0182921198494897, 0.0175178044808441, 0.0167183172536454, 0.0159002041614507, 0.0150698902735454, 0.0142335991336574, 0.0133972796167302, 0.0125665413280325, 0.0117465993701676, 0.010942229037054, 0.0101577307281371, 0.00939690511889675, 0.00866303838230695, 0.00795889703644795, 0.00728673180099395, 0.00664828968356621, 0.00604483338842317, 0.00547716704583487, 0.00494566720070898, 0.00445031797242299, 0.00399074930220799, 0.00356627723683793, 0.00317594525418154, 0.00281856571330067, 0.00249276060490197, 0.00219700088266432, 0.00192964376796036, 0.00168896753568649, 0.00147320340358764, 0.00128056425833231, 0.00110927005589186, 0.000957569829285965, 0.000823760321812988, 0.000706201337376934, 0.000603327960854364, 0.00051365985048857, 0.000435807841336874, 0.000368478124457557, 0.000310474281706066, 0.000260697461819069, 0.000218144981137171, 0.000181907623161359, 0.000151165896477646, 0.000125185491708561, 0.000103312156275406, 8.49661819944029e-05, 6.96366758708924e-05, 5.68757597480354e-05, 4.62928204154855e-05, 3.75489089511911e-05, 3.03513668801384e-05, 2.44487374842009e-05, 1.96260034693739e-05, 1.57001772728555e-05, 1.25162575710745e-05}});
return tests.toArray(new Object[][]{});
}
@Test( dataProvider = "KernelCreation")
public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true);
final double[] kernel = profile.getKernel();
Assert.assertEquals(kernel.length, expectedKernel.length);
for ( int i = 0; i < kernel.length; i++ )
Assert.assertEquals(kernel[i], expectedKernel[i], 1e-3, "Kernels not equal at " + i);
}
}