Making band pass filter size, sigma, active region max size and extension all accessible from the command line
This commit is contained in:
parent
cd91e365f4
commit
9e43a2028d
|
|
@ -102,11 +102,14 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
"non-primary reads, an inconsistent state. Please modify the walker");
|
||||
}
|
||||
|
||||
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
ActiveRegionExtension annotation = walker.getClass().getAnnotation(ActiveRegionExtension.class);
|
||||
this.activeRegionExtension = this.walker.activeRegionExtension == null ? annotation.extension() : this.walker.activeRegionExtension;
|
||||
this.maxRegionSize = this.walker.activeRegionMaxSize == null ? annotation.maxRegion() : this.walker.activeRegionMaxSize;
|
||||
final int bandPassFilterSize = this.walker.bandPassFilterSize == null ? annotation.bandPassFilterSize() : this.walker.bandPassFilterSize;
|
||||
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
|
||||
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
|
||||
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), bandPassFilterSize, bandPassSigma);
|
||||
if ( walkerHasPresetRegions ) {
|
||||
// we load all of the preset locations into the
|
||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile;
|
||||
|
||||
import java.lang.annotation.Documented;
|
||||
import java.lang.annotation.Inherited;
|
||||
import java.lang.annotation.Retention;
|
||||
|
|
@ -42,4 +44,18 @@ import java.lang.annotation.RetentionPolicy;
|
|||
public @interface ActiveRegionExtension {
|
||||
public int extension() default 0;
|
||||
public int maxRegion() default 1500;
|
||||
|
||||
/**
|
||||
* The size of the band pass filter in bp. The filter size describes how far
|
||||
* from the current site the band pass extends. So a value of 1 implies a total
|
||||
* band size of 3 bp, the site bp and one on each side
|
||||
* @return
|
||||
*/
|
||||
public int bandPassFilterSize() default BandPassActivityProfile.DEFAULT_FILTER_SIZE;
|
||||
|
||||
/**
|
||||
* The sigma value for the Gaussian kernel of the band pass filter
|
||||
* @return
|
||||
*/
|
||||
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,9 +28,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
|
|
@ -86,6 +84,22 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
|
||||
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="activeRegionExtension", shortName="activeRegionExtension", doc="The active region extension; if not provided defaults to Walker annotated default", required = false)
|
||||
public Integer activeRegionExtension = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="activeRegionMaxSize", shortName="activeRegionMaxSize", doc="The active region maximum size; if not provided defaults to Walker annotated default", required = false)
|
||||
public Integer activeRegionMaxSize = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="bandPassFilterSize", shortName="bandPassFilterSize", doc="The filter size of band pass filter; if not provided defaults to Walker annotated default", required = false)
|
||||
public Integer bandPassFilterSize = null;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
||||
public Double bandPassSigma = null;
|
||||
|
||||
private GenomeLocSortedSet presetActiveRegions = null;
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -651,7 +651,7 @@ public class MathUtils {
|
|||
|
||||
final double sum = sum(array);
|
||||
final double[] normalized = new double[array.length];
|
||||
if ( sum < 0.0 || sum > 1.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
|
||||
if ( sum < 0.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
|
||||
for ( int i = 0; i < array.length; i++ ) {
|
||||
normalized[i] = array[i] / sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,8 +44,10 @@ import java.util.LinkedList;
|
|||
*/
|
||||
public class BandPassActivityProfile extends ActivityProfile {
|
||||
public static final int DEFAULT_FILTER_SIZE = 80;
|
||||
public static final double DEFAULT_SIGMA = 55.0;
|
||||
|
||||
private final int filterSize;
|
||||
private final double sigma;
|
||||
private final double[] GaussianKernel;
|
||||
|
||||
/**
|
||||
|
|
@ -53,7 +55,7 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
* @param parser our genome loc parser
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser) {
|
||||
this(parser, DEFAULT_FILTER_SIZE);
|
||||
this(parser, DEFAULT_FILTER_SIZE, DEFAULT_SIGMA);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -63,16 +65,18 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
* side that are included in the band. So a filter size of 1 implies that the actual band
|
||||
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) {
|
||||
super(parser);
|
||||
|
||||
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
|
||||
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
|
||||
|
||||
// setup the Gaussian kernel for the band pass filter
|
||||
this.filterSize = filterSize;
|
||||
this.sigma = sigma;
|
||||
final double[] kernel = new double[getBandSize()];
|
||||
for( int iii = 0; iii < 2* filterSize + 1; iii++ ) {
|
||||
kernel[iii] = MathUtils.NormalDistribution(filterSize, 55.0, iii);
|
||||
kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii);
|
||||
}
|
||||
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
|
||||
}
|
||||
|
|
@ -108,6 +112,15 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
return filterSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Gaussian kernel sigma value
|
||||
* @return a positive double
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public double getSigma() {
|
||||
return sigma;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the kernel of this band pass filter. Do not modify returned result
|
||||
* @return the kernel used in this band pass filter
|
||||
|
|
|
|||
|
|
@ -64,11 +64,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
for ( boolean precedingIsActive : Arrays.asList(true, false) ) {
|
||||
for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) {
|
||||
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) {
|
||||
for ( double sigma : Arrays.asList(1.0, 2.0, BandPassActivityProfile.DEFAULT_SIGMA) ) {
|
||||
// for ( int start : Arrays.asList(10) ) {
|
||||
// for ( boolean precedingIsActive : Arrays.asList(false) ) {
|
||||
// for ( int precedingSites: Arrays.asList(0) ) {
|
||||
// for ( int bandPassSize : Arrays.asList(1) ) {
|
||||
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize });
|
||||
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize, sigma });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -78,10 +80,12 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test(dataProvider = "BandPassBasicTest")
|
||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma);
|
||||
|
||||
final int expectedBandSize = bandPassSize * 2 + 1;
|
||||
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
|
||||
Assert.assertEquals(profile.getSigma(), sigma, "Wrong sigma");
|
||||
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
|
||||
|
||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||
|
|
@ -132,7 +136,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
@Test( dataProvider = "BandPassComposition")
|
||||
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
||||
final int start = 1;
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
||||
|
||||
// add a buffer so that we can get all of the band pass values
|
||||
|
|
|
|||
Loading…
Reference in New Issue