Merge pull request #742 from broadinstitute/rhl_prob_thresh_param_arg
Rhl prob thresh param arg
This commit is contained in:
commit
22a3c6e556
|
|
@ -78,7 +78,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final static String GGA_INTERVALS_FILE = privateTestDir + "haplotype-caller-reduced-test-interval.list";
|
||||
|
||||
private void HCTest(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCaller: args=" + args, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,7 +159,8 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
|
||||
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
|
||||
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), this.walker.maxProbPropagationDistance, this.walker.activeProbThreshold,
|
||||
BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
|
||||
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * SampleUtils.getSAMFileSamples(engine).size();
|
||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
||||
|
|
|
|||
|
|
@ -107,6 +107,17 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
@Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false)
|
||||
public Double bandPassSigma = null;
|
||||
|
||||
/*
|
||||
* For active region limits in ActivityProfile
|
||||
* */
|
||||
@Hidden
|
||||
@Argument(fullName = "maxProbPropagationDistance", shortName = "maxProbPropDist", minValue = 0, doc="Region probability propagation distance beyond it's maximum size.", required = false)
|
||||
public Integer maxProbPropagationDistance = 50;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "activeProbabilityThreshold", shortName = "ActProbThresh", minValue = 0.0, maxValue = 1.0, doc="Threshold for the probability of a profile state being active.", required = false)
|
||||
public Double activeProbThreshold = 0.002;
|
||||
|
||||
private GenomeLocSortedSet presetActiveRegions = null;
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -41,13 +41,13 @@ import java.util.*;
|
|||
* @since Date created
|
||||
*/
|
||||
public class ActivityProfile {
|
||||
private final static int MAX_PROB_PROPAGATION_DISTANCE = 50;
|
||||
protected final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
|
||||
|
||||
protected final List<ActivityProfileState> stateList;
|
||||
protected final GenomeLocParser parser;
|
||||
protected final GenomeLocSortedSet restrictToIntervals;
|
||||
|
||||
protected final int maxProbPropagationDistance;
|
||||
protected final double activeProbThreshold;
|
||||
|
||||
protected GenomeLoc regionStartLoc = null;
|
||||
protected GenomeLoc regionStopLoc = null;
|
||||
|
||||
|
|
@ -60,22 +60,28 @@ public class ActivityProfile {
|
|||
/**
|
||||
* Create a new empty ActivityProfile
|
||||
* @param parser the parser we can use to create genome locs, cannot be null
|
||||
* @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size
|
||||
* @param activeProbThreshold threshold for the probability of am active profile state being active
|
||||
*/
|
||||
public ActivityProfile(final GenomeLocParser parser) {
|
||||
this(parser, null);
|
||||
public ActivityProfile(final GenomeLocParser parser, final int maxProbPropagationDistance, final double activeProbThreshold) {
|
||||
this(parser, maxProbPropagationDistance, activeProbThreshold, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null
|
||||
* @param parser the parser we can use to create genome locs, cannot be null
|
||||
* @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size
|
||||
* @param activeProbThreshold threshold for the probability of a profile state being active
|
||||
* @param intervals only include states that are within these intervals, if not null
|
||||
*/
|
||||
public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) {
|
||||
public ActivityProfile(final GenomeLocParser parser, final int maxProbPropagationDistance, final double activeProbThreshold, final GenomeLocSortedSet intervals) {
|
||||
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
|
||||
|
||||
this.parser = parser;
|
||||
this.stateList = new ArrayList<ActivityProfileState>();
|
||||
this.restrictToIntervals = intervals;
|
||||
this.maxProbPropagationDistance = maxProbPropagationDistance;
|
||||
this.activeProbThreshold = activeProbThreshold;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -98,7 +104,7 @@ public class ActivityProfile {
|
|||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getMaxProbPropagationDistance() {
|
||||
return MAX_PROB_PROPAGATION_DISTANCE;
|
||||
return maxProbPropagationDistance;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -359,7 +365,7 @@ public class ActivityProfile {
|
|||
}
|
||||
|
||||
final ActivityProfileState first = stateList.get(0);
|
||||
final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD;
|
||||
final boolean isActiveRegion = first.isActiveProb > activeProbThreshold;
|
||||
final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion);
|
||||
if ( offsetOfNextRegionEnd == -1 )
|
||||
// couldn't find a valid ending offset, so we return null
|
||||
|
|
@ -453,7 +459,7 @@ public class ActivityProfile {
|
|||
* Find the first index into the state list where the state is considered ! isActiveRegion
|
||||
*
|
||||
* Note that each state has a probability of being active, and this function thresholds that
|
||||
* value on ACTIVE_PROB_THRESHOLD, coloring each state as active or inactive. Finds the
|
||||
* value on activeProbThreshold, coloring each state as active or inactive. Finds the
|
||||
* largest contiguous stretch of states starting at the first state (index 0) with the same isActive
|
||||
* state as isActiveRegion. If the entire state list has the same isActive value, then returns
|
||||
* maxRegionSize
|
||||
|
|
@ -470,7 +476,7 @@ public class ActivityProfile {
|
|||
int endOfActiveRegion = 0;
|
||||
|
||||
while ( endOfActiveRegion < nStates && endOfActiveRegion < maxRegionSize ) {
|
||||
if ( getProb(endOfActiveRegion) > ACTIVE_PROB_THRESHOLD != isActiveRegion ) {
|
||||
if ( getProb(endOfActiveRegion) > activeProbThreshold != isActiveRegion ) {
|
||||
break;
|
||||
}
|
||||
endOfActiveRegion++;
|
||||
|
|
|
|||
|
|
@ -55,19 +55,22 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
/**
|
||||
* Create a new BandPassActivityProfile with default sigma and filter sizes
|
||||
*
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, boolean)
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, int, double, boolean)
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) {
|
||||
this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals,
|
||||
final int maxProbPropagationDistance, final double activeProbThreshold) {
|
||||
this(parser, restrictToIntervals, maxProbPropagationDistance, activeProbThreshold, MAX_FILTER_SIZE, DEFAULT_SIGMA);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, boolean)
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, int, double, boolean)
|
||||
*
|
||||
* sets adaptiveFilterSize to true
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) {
|
||||
this(parser, restrictToIntervals, maxFilterSize, sigma, true);
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals,
|
||||
final int maxProbPropagationDistance, final double activeProbThreshold,
|
||||
final int maxFilterSize, final double sigma) {
|
||||
this(parser, restrictToIntervals, maxProbPropagationDistance, activeProbThreshold, maxFilterSize, sigma, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -75,12 +78,15 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
*
|
||||
* @param parser our genome loc parser
|
||||
* @param restrictToIntervals only include states that are within these intervals, if not null
|
||||
* @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size
|
||||
* @param activeProbThreshold threshold for the probability of a profile state being active
|
||||
* @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
|
||||
* @param sigma the variance of the Gaussian kernel for this band pass filter
|
||||
* @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
|
||||
super(parser, restrictToIntervals);
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxProbPropagationDistance,
|
||||
final double activeProbThreshold, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
|
||||
super(parser, maxProbPropagationDistance, activeProbThreshold, restrictToIntervals);
|
||||
|
||||
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,9 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
private GenomeLocParser genomeLocParser;
|
||||
private GenomeLoc startLoc;
|
||||
|
||||
private final static int MAX_PROB_PROPAGATION_DISTANCE = 50;
|
||||
private final static double ACTIVE_PROB_THRESHOLD= 0.002;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
|
|
@ -86,10 +89,10 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
public ActivityProfile makeProfile() {
|
||||
switch ( type ) {
|
||||
case Base: return new ActivityProfile(genomeLocParser);
|
||||
case Base: return new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
case BandPass:
|
||||
// zero size => equivalent to ActivityProfile
|
||||
return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false);
|
||||
return new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, 0, 0.01, false);
|
||||
default: throw new IllegalStateException(type.toString());
|
||||
}
|
||||
}
|
||||
|
|
@ -230,7 +233,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = !DEBUG, dataProvider = "RegionCreationTests")
|
||||
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
Assert.assertNotNull(profile.toString());
|
||||
|
||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||
|
|
@ -316,7 +319,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = ! DEBUG, dataProvider = "SoftClipsTest")
|
||||
public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) {
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
|
||||
final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength();
|
||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||
|
|
@ -450,7 +453,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
private double[] makeGaussian(final int mean, final int range, final double sigma) {
|
||||
final double[] gauss = new double[range];
|
||||
for( int iii = 0; iii < range; iii++ ) {
|
||||
gauss[iii] = MathUtils.normalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD;
|
||||
gauss[iii] = MathUtils.normalDistribution(mean, sigma, iii) + ACTIVE_PROB_THRESHOLD;
|
||||
}
|
||||
return gauss;
|
||||
}
|
||||
|
|
@ -469,7 +472,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "ActiveRegionCutTests")
|
||||
public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List<Double> probs) {
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||
final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
|
||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||
for ( int i = 0; i <= maxRegionSize + profile.getMaxProbPropagationDistance(); i++ ) {
|
||||
|
|
|
|||
|
|
@ -59,6 +59,9 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
private final static boolean DEBUG = false;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
private final static int MAX_PROB_PROPAGATION_DISTANCE = 50;
|
||||
private final static double ACTIVE_PROB_THRESHOLD= 0.002;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
|
|
@ -91,7 +94,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest")
|
||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, bandPassSize, sigma, false);
|
||||
|
||||
final int expectedBandSize = bandPassSize * 2 + 1;
|
||||
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
|
||||
|
|
@ -155,7 +158,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
@Test( enabled = ! DEBUG, dataProvider = "BandPassComposition")
|
||||
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
||||
final int start = 1;
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE,
|
||||
ACTIVE_PROB_THRESHOLD, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
||||
|
||||
// add a buffer so that we can get all of the band pass values
|
||||
|
|
@ -228,7 +232,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test( enabled = ! DEBUG, dataProvider = "KernelCreation")
|
||||
public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD,
|
||||
maxSize, sigma, true);
|
||||
|
||||
final double[] kernel = profile.getKernel();
|
||||
Assert.assertEquals(kernel.length, expectedKernel.length);
|
||||
|
|
@ -268,8 +273,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
final Pair<VCFHeader, GATKVCFUtils.VCIterable<LineIterator>> reader = GATKVCFUtils.readAllVCs(file, codec);
|
||||
|
||||
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
|
||||
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null);
|
||||
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null);
|
||||
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD);
|
||||
int pos = start;
|
||||
for ( final VariantContext vc : reader.getSecond() ) {
|
||||
if ( vc == null ) continue;
|
||||
|
|
@ -324,10 +329,10 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
lastPosSeen = region.getLocation().getStop();
|
||||
|
||||
for ( final ActivityProfileState state : region.getSupportingStates() ) {
|
||||
Assert.assertEquals(state.isActiveProb > ActivityProfile.ACTIVE_PROB_THRESHOLD, region.isActive(),
|
||||
Assert.assertEquals(state.isActiveProb > ACTIVE_PROB_THRESHOLD, region.isActive(),
|
||||
"Region is active=" + region.isActive() + " but contains a state " + state + " with prob "
|
||||
+ state.isActiveProb + " not within expected values given threshold for activity of "
|
||||
+ ActivityProfile.ACTIVE_PROB_THRESHOLD);
|
||||
+ ACTIVE_PROB_THRESHOLD);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue