Making band pass filter size, sigma, active region max size and extension all accessible from the command line

This commit is contained in:
Mark DePristo 2013-01-23 14:50:14 -05:00
parent cd91e365f4
commit 9e43a2028d
6 changed files with 64 additions and 14 deletions

View File

@ -102,11 +102,14 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
"non-primary reads, an inconsistent state. Please modify the walker");
}
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
ActiveRegionExtension annotation = walker.getClass().getAnnotation(ActiveRegionExtension.class);
this.activeRegionExtension = this.walker.activeRegionExtension == null ? annotation.extension() : this.walker.activeRegionExtension;
this.maxRegionSize = this.walker.activeRegionMaxSize == null ? annotation.maxRegion() : this.walker.activeRegionMaxSize;
final int bandPassFilterSize = this.walker.bandPassFilterSize == null ? annotation.bandPassFilterSize() : this.walker.bandPassFilterSize;
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), bandPassFilterSize, bandPassSigma);
if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile;
import java.lang.annotation.Documented;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
@ -42,4 +44,18 @@ import java.lang.annotation.RetentionPolicy;
public @interface ActiveRegionExtension {
public int extension() default 0;
public int maxRegion() default 1500;
/**
* The size of the band pass filter in bp. The filter size describes how far
* from the current site the band pass extends. So a value of 1 implies a total
* band size of 3 bp, the site bp and one on each side
* @return
*/
public int bandPassFilterSize() default BandPassActivityProfile.DEFAULT_FILTER_SIZE;
/**
* The sigma value for the Gaussian kernel of the band pass filter
* @return
*/
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
}

View File

@ -28,9 +28,7 @@ package org.broadinstitute.sting.gatk.walkers;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.*;
@ -86,6 +84,22 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
@Advanced
@Argument(fullName="activeRegionExtension", shortName="activeRegionExtension", doc="The active region extension; if not provided defaults to Walker annotated default", required = false)
public Integer activeRegionExtension = null;
@Advanced
@Argument(fullName="activeRegionMaxSize", shortName="activeRegionMaxSize", doc="The active region maximum size; if not provided defaults to Walker annotated default", required = false)
public Integer activeRegionMaxSize = null;
@Advanced
@Argument(fullName="bandPassFilterSize", shortName="bandPassFilterSize", doc="The filter size of band pass filter; if not provided defaults to Walker annotated default", required = false)
public Integer bandPassFilterSize = null;
@Advanced
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
public Double bandPassSigma = null;
private GenomeLocSortedSet presetActiveRegions = null;
@Override

View File

@ -651,7 +651,7 @@ public class MathUtils {
final double sum = sum(array);
final double[] normalized = new double[array.length];
if ( sum < 0.0 || sum > 1.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
if ( sum < 0.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
for ( int i = 0; i < array.length; i++ ) {
normalized[i] = array[i] / sum;
}

View File

@ -44,8 +44,10 @@ import java.util.LinkedList;
*/
public class BandPassActivityProfile extends ActivityProfile {
public static final int DEFAULT_FILTER_SIZE = 80;
public static final double DEFAULT_SIGMA = 55.0;
private final int filterSize;
private final double sigma;
private final double[] GaussianKernel;
/**
@ -53,7 +55,7 @@ public class BandPassActivityProfile extends ActivityProfile {
* @param parser our genome loc parser
*/
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, DEFAULT_FILTER_SIZE);
this(parser, DEFAULT_FILTER_SIZE, DEFAULT_SIGMA);
}
/**
@ -63,16 +65,18 @@ public class BandPassActivityProfile extends ActivityProfile {
* side that are included in the band. So a filter size of 1 implies that the actual band
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
*/
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) {
super(parser);
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
// setup the Gaussian kernel for the band pass filter
this.filterSize = filterSize;
this.sigma = sigma;
final double[] kernel = new double[getBandSize()];
for( int iii = 0; iii < 2* filterSize + 1; iii++ ) {
kernel[iii] = MathUtils.NormalDistribution(filterSize, 55.0, iii);
kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii);
}
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
}
@ -108,6 +112,15 @@ public class BandPassActivityProfile extends ActivityProfile {
return filterSize;
}
/**
* Get the Gaussian kernel sigma value
* @return a positive double
*/
@Ensures("result >= 0")
public double getSigma() {
return sigma;
}
/**
* Get the kernel of this band pass filter. Do not modify returned result
* @return the kernel used in this band pass filter

View File

@ -64,11 +64,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
for ( boolean precedingIsActive : Arrays.asList(true, false) ) {
for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) {
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) {
for ( double sigma : Arrays.asList(1.0, 2.0, BandPassActivityProfile.DEFAULT_SIGMA) ) {
// for ( int start : Arrays.asList(10) ) {
// for ( boolean precedingIsActive : Arrays.asList(false) ) {
// for ( int precedingSites: Arrays.asList(0) ) {
// for ( int bandPassSize : Arrays.asList(1) ) {
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize });
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize, sigma });
}
}
}
}
@ -78,10 +80,12 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
}
@Test(dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma);
final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
Assert.assertEquals(profile.getSigma(), sigma, "Wrong sigma");
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
@ -132,7 +136,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test( dataProvider = "BandPassComposition")
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
final int start = 1;
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
// add a buffer so that we can get all of the band pass values