Making band pass filter size, sigma, active region max size and extension all accessible from the command line
This commit is contained in:
parent
cd91e365f4
commit
9e43a2028d
|
|
@ -102,11 +102,14 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
"non-primary reads, an inconsistent state. Please modify the walker");
|
"non-primary reads, an inconsistent state. Please modify the walker");
|
||||||
}
|
}
|
||||||
|
|
||||||
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
ActiveRegionExtension annotation = walker.getClass().getAnnotation(ActiveRegionExtension.class);
|
||||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
this.activeRegionExtension = this.walker.activeRegionExtension == null ? annotation.extension() : this.walker.activeRegionExtension;
|
||||||
|
this.maxRegionSize = this.walker.activeRegionMaxSize == null ? annotation.maxRegion() : this.walker.activeRegionMaxSize;
|
||||||
|
final int bandPassFilterSize = this.walker.bandPassFilterSize == null ? annotation.bandPassFilterSize() : this.walker.bandPassFilterSize;
|
||||||
|
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
|
||||||
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
|
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
|
||||||
|
|
||||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
|
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), bandPassFilterSize, bandPassSigma);
|
||||||
if ( walkerHasPresetRegions ) {
|
if ( walkerHasPresetRegions ) {
|
||||||
// we load all of the preset locations into the
|
// we load all of the preset locations into the
|
||||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile;
|
||||||
|
|
||||||
import java.lang.annotation.Documented;
|
import java.lang.annotation.Documented;
|
||||||
import java.lang.annotation.Inherited;
|
import java.lang.annotation.Inherited;
|
||||||
import java.lang.annotation.Retention;
|
import java.lang.annotation.Retention;
|
||||||
|
|
@ -42,4 +44,18 @@ import java.lang.annotation.RetentionPolicy;
|
||||||
public @interface ActiveRegionExtension {
|
public @interface ActiveRegionExtension {
|
||||||
public int extension() default 0;
|
public int extension() default 0;
|
||||||
public int maxRegion() default 1500;
|
public int maxRegion() default 1500;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The size of the band pass filter in bp. The filter size describes how far
|
||||||
|
* from the current site the band pass extends. So a value of 1 implies a total
|
||||||
|
* band size of 3 bp, the site bp and one on each side
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int bandPassFilterSize() default BandPassActivityProfile.DEFAULT_FILTER_SIZE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The sigma value for the Gaussian kernel of the band pass filter
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.*;
|
import org.broadinstitute.sting.gatk.filters.*;
|
||||||
|
|
@ -86,6 +84,22 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
|
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
|
||||||
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
|
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="activeRegionExtension", shortName="activeRegionExtension", doc="The active region extension; if not provided defaults to Walker annotated default", required = false)
|
||||||
|
public Integer activeRegionExtension = null;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="activeRegionMaxSize", shortName="activeRegionMaxSize", doc="The active region maximum size; if not provided defaults to Walker annotated default", required = false)
|
||||||
|
public Integer activeRegionMaxSize = null;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="bandPassFilterSize", shortName="bandPassFilterSize", doc="The filter size of band pass filter; if not provided defaults to Walker annotated default", required = false)
|
||||||
|
public Integer bandPassFilterSize = null;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
||||||
|
public Double bandPassSigma = null;
|
||||||
|
|
||||||
private GenomeLocSortedSet presetActiveRegions = null;
|
private GenomeLocSortedSet presetActiveRegions = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -651,7 +651,7 @@ public class MathUtils {
|
||||||
|
|
||||||
final double sum = sum(array);
|
final double sum = sum(array);
|
||||||
final double[] normalized = new double[array.length];
|
final double[] normalized = new double[array.length];
|
||||||
if ( sum < 0.0 || sum > 1.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
|
if ( sum < 0.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum);
|
||||||
for ( int i = 0; i < array.length; i++ ) {
|
for ( int i = 0; i < array.length; i++ ) {
|
||||||
normalized[i] = array[i] / sum;
|
normalized[i] = array[i] / sum;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,8 +44,10 @@ import java.util.LinkedList;
|
||||||
*/
|
*/
|
||||||
public class BandPassActivityProfile extends ActivityProfile {
|
public class BandPassActivityProfile extends ActivityProfile {
|
||||||
public static final int DEFAULT_FILTER_SIZE = 80;
|
public static final int DEFAULT_FILTER_SIZE = 80;
|
||||||
|
public static final double DEFAULT_SIGMA = 55.0;
|
||||||
|
|
||||||
private final int filterSize;
|
private final int filterSize;
|
||||||
|
private final double sigma;
|
||||||
private final double[] GaussianKernel;
|
private final double[] GaussianKernel;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -53,7 +55,7 @@ public class BandPassActivityProfile extends ActivityProfile {
|
||||||
* @param parser our genome loc parser
|
* @param parser our genome loc parser
|
||||||
*/
|
*/
|
||||||
public BandPassActivityProfile(final GenomeLocParser parser) {
|
public BandPassActivityProfile(final GenomeLocParser parser) {
|
||||||
this(parser, DEFAULT_FILTER_SIZE);
|
this(parser, DEFAULT_FILTER_SIZE, DEFAULT_SIGMA);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -63,16 +65,18 @@ public class BandPassActivityProfile extends ActivityProfile {
|
||||||
* side that are included in the band. So a filter size of 1 implies that the actual band
|
* side that are included in the band. So a filter size of 1 implies that the actual band
|
||||||
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
|
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
|
||||||
*/
|
*/
|
||||||
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
|
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) {
|
||||||
super(parser);
|
super(parser);
|
||||||
|
|
||||||
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
|
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
|
||||||
|
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
|
||||||
|
|
||||||
// setup the Gaussian kernel for the band pass filter
|
// setup the Gaussian kernel for the band pass filter
|
||||||
this.filterSize = filterSize;
|
this.filterSize = filterSize;
|
||||||
|
this.sigma = sigma;
|
||||||
final double[] kernel = new double[getBandSize()];
|
final double[] kernel = new double[getBandSize()];
|
||||||
for( int iii = 0; iii < 2* filterSize + 1; iii++ ) {
|
for( int iii = 0; iii < 2* filterSize + 1; iii++ ) {
|
||||||
kernel[iii] = MathUtils.NormalDistribution(filterSize, 55.0, iii);
|
kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii);
|
||||||
}
|
}
|
||||||
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
|
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
|
||||||
}
|
}
|
||||||
|
|
@ -108,6 +112,15 @@ public class BandPassActivityProfile extends ActivityProfile {
|
||||||
return filterSize;
|
return filterSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the Gaussian kernel sigma value
|
||||||
|
* @return a positive double
|
||||||
|
*/
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public double getSigma() {
|
||||||
|
return sigma;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the kernel of this band pass filter. Do not modify returned result
|
* Get the kernel of this band pass filter. Do not modify returned result
|
||||||
* @return the kernel used in this band pass filter
|
* @return the kernel used in this band pass filter
|
||||||
|
|
|
||||||
|
|
@ -64,11 +64,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
||||||
for ( boolean precedingIsActive : Arrays.asList(true, false) ) {
|
for ( boolean precedingIsActive : Arrays.asList(true, false) ) {
|
||||||
for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) {
|
for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) {
|
||||||
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) {
|
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) {
|
||||||
|
for ( double sigma : Arrays.asList(1.0, 2.0, BandPassActivityProfile.DEFAULT_SIGMA) ) {
|
||||||
// for ( int start : Arrays.asList(10) ) {
|
// for ( int start : Arrays.asList(10) ) {
|
||||||
// for ( boolean precedingIsActive : Arrays.asList(false) ) {
|
// for ( boolean precedingIsActive : Arrays.asList(false) ) {
|
||||||
// for ( int precedingSites: Arrays.asList(0) ) {
|
// for ( int precedingSites: Arrays.asList(0) ) {
|
||||||
// for ( int bandPassSize : Arrays.asList(1) ) {
|
// for ( int bandPassSize : Arrays.asList(1) ) {
|
||||||
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize });
|
tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize, sigma });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -78,10 +80,12 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "BandPassBasicTest")
|
@Test(dataProvider = "BandPassBasicTest")
|
||||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
|
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
|
||||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma);
|
||||||
|
|
||||||
final int expectedBandSize = bandPassSize * 2 + 1;
|
final int expectedBandSize = bandPassSize * 2 + 1;
|
||||||
|
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
|
||||||
|
Assert.assertEquals(profile.getSigma(), sigma, "Wrong sigma");
|
||||||
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
|
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
|
||||||
|
|
||||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||||
|
|
@ -132,7 +136,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
||||||
@Test( dataProvider = "BandPassComposition")
|
@Test( dataProvider = "BandPassComposition")
|
||||||
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
||||||
final int start = 1;
|
final int start = 1;
|
||||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||||
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
||||||
|
|
||||||
// add a buffer so that we can get all of the band pass values
|
// add a buffer so that we can get all of the band pass values
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue