diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 2853fce29..11576911f 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -78,7 +78,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String GGA_INTERVALS_FILE = privateTestDir + "haplotype-caller-reduced-test-interval.list"; private void HCTest(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; + final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering --pcr_indel_model NONE --maxReadsInRegionPerSample 1000 --minReadsPerAlignmentStart 5 --maxProbPropagationDistance 50 --activeProbabilityThreshold 0.002 -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCaller: args=" + args, spec); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegions.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegions.java index 3a8c3ffab..7d93311f2 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegions.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/engine/traversals/TraverseActiveRegions.java @@ -159,7 +159,8 @@ public final class TraverseActiveRegions extends TraversalEngine extends Walker stateList; protected final GenomeLocParser parser; protected final GenomeLocSortedSet restrictToIntervals; + protected final int maxProbPropagationDistance; + protected final double activeProbThreshold; + protected GenomeLoc regionStartLoc = null; protected GenomeLoc regionStopLoc = null; @@ -60,22 +60,28 @@ public class ActivityProfile { /** * Create a new empty ActivityProfile * @param parser the parser we can use to create genome locs, cannot be null + * @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size + * @param activeProbThreshold threshold for the probability of am active profile state being active */ - public ActivityProfile(final GenomeLocParser parser) { - this(parser, null); + public ActivityProfile(final GenomeLocParser parser, final int maxProbPropagationDistance, final double activeProbThreshold) { + this(parser, maxProbPropagationDistance, activeProbThreshold, null); } /** * Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null * @param parser the parser we can use to create genome locs, cannot be null + * @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size + * @param activeProbThreshold threshold for the probability of a profile state being active * @param intervals only include states that are within these intervals, if not null */ - public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) { + public ActivityProfile(final GenomeLocParser parser, final int maxProbPropagationDistance, final double activeProbThreshold, final GenomeLocSortedSet intervals) { if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); this.parser = parser; this.stateList = new ArrayList(); this.restrictToIntervals = intervals; + this.maxProbPropagationDistance = maxProbPropagationDistance; + this.activeProbThreshold = activeProbThreshold; } @Override @@ -98,7 +104,7 @@ public class ActivityProfile { */ @Ensures("result >= 0") public int getMaxProbPropagationDistance() { - return MAX_PROB_PROPAGATION_DISTANCE; + return maxProbPropagationDistance; } /** @@ -359,7 +365,7 @@ public class ActivityProfile { } final ActivityProfileState first = stateList.get(0); - final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD; + final boolean isActiveRegion = first.isActiveProb > activeProbThreshold; final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion); if ( offsetOfNextRegionEnd == -1 ) // couldn't find a valid ending offset, so we return null @@ -453,7 +459,7 @@ public class ActivityProfile { * Find the first index into the state list where the state is considered ! isActiveRegion * * Note that each state has a probability of being active, and this function thresholds that - * value on ACTIVE_PROB_THRESHOLD, coloring each state as active or inactive. Finds the + * value on activeProbThreshold, coloring each state as active or inactive. Finds the * largest contiguous stretch of states starting at the first state (index 0) with the same isActive * state as isActiveRegion. If the entire state list has the same isActive value, then returns * maxRegionSize @@ -470,7 +476,7 @@ public class ActivityProfile { int endOfActiveRegion = 0; while ( endOfActiveRegion < nStates && endOfActiveRegion < maxRegionSize ) { - if ( getProb(endOfActiveRegion) > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { + if ( getProb(endOfActiveRegion) > activeProbThreshold != isActiveRegion ) { break; } endOfActiveRegion++; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfile.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfile.java index b79050c87..52437a846 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfile.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfile.java @@ -55,19 +55,22 @@ public class BandPassActivityProfile extends ActivityProfile { /** * Create a new BandPassActivityProfile with default sigma and filter sizes * - * @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, boolean) + * @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, int, double, boolean) */ - public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) { - this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true); + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, + final int maxProbPropagationDistance, final double activeProbThreshold) { + this(parser, restrictToIntervals, maxProbPropagationDistance, activeProbThreshold, MAX_FILTER_SIZE, DEFAULT_SIGMA); } /** - * @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, boolean) + * @see #BandPassActivityProfile(org.broadinstitute.gatk.utils.GenomeLocParser, org.broadinstitute.gatk.utils.GenomeLocSortedSet, int, double, int, double, boolean) * * sets adaptiveFilterSize to true */ - public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) { - this(parser, restrictToIntervals, maxFilterSize, sigma, true); + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, + final int maxProbPropagationDistance, final double activeProbThreshold, + final int maxFilterSize, final double sigma) { + this(parser, restrictToIntervals, maxProbPropagationDistance, activeProbThreshold, maxFilterSize, sigma, true); } /** @@ -75,12 +78,15 @@ public class BandPassActivityProfile extends ActivityProfile { * * @param parser our genome loc parser * @param restrictToIntervals only include states that are within these intervals, if not null + * @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size + * @param activeProbThreshold threshold for the probability of a profile state being active * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma * @param sigma the variance of the Gaussian kernel for this band pass filter * @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size */ - public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { - super(parser, restrictToIntervals); + public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxProbPropagationDistance, + final double activeProbThreshold, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { + super(parser, maxProbPropagationDistance, activeProbThreshold, restrictToIntervals); if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/ActivityProfileUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/ActivityProfileUnitTest.java index d450312e8..b3442b331 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/ActivityProfileUnitTest.java @@ -51,6 +51,9 @@ public class ActivityProfileUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; private GenomeLoc startLoc; + private final static int MAX_PROB_PROPAGATION_DISTANCE = 50; + private final static double ACTIVE_PROB_THRESHOLD= 0.002; + @BeforeClass public void init() throws FileNotFoundException { // sequence @@ -86,10 +89,10 @@ public class ActivityProfileUnitTest extends BaseTest { public ActivityProfile makeProfile() { switch ( type ) { - case Base: return new ActivityProfile(genomeLocParser); + case Base: return new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); case BandPass: // zero size => equivalent to ActivityProfile - return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false); + return new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, 0, 0.01, false); default: throw new IllegalStateException(type.toString()); } } @@ -230,7 +233,7 @@ public class ActivityProfileUnitTest extends BaseTest { @Test(enabled = !DEBUG, dataProvider = "RegionCreationTests") public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { - final ActivityProfile profile = new ActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); Assert.assertNotNull(profile.toString()); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); @@ -316,7 +319,7 @@ public class ActivityProfileUnitTest extends BaseTest { @Test(enabled = ! DEBUG, dataProvider = "SoftClipsTest") public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { - final ActivityProfile profile = new ActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); @@ -450,7 +453,7 @@ public class ActivityProfileUnitTest extends BaseTest { private double[] makeGaussian(final int mean, final int range, final double sigma) { final double[] gauss = new double[range]; for( int iii = 0; iii < range; iii++ ) { - gauss[iii] = MathUtils.normalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD; + gauss[iii] = MathUtils.normalDistribution(mean, sigma, iii) + ACTIVE_PROB_THRESHOLD; } return gauss; } @@ -469,7 +472,7 @@ public class ActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "ActiveRegionCutTests") public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List probs) { - final ActivityProfile profile = new ActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); for ( int i = 0; i <= maxRegionSize + profile.getMaxProbPropagationDistance(); i++ ) { diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfileUnitTest.java index abf599a50..2087d9a0c 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -59,6 +59,9 @@ public class BandPassActivityProfileUnitTest extends BaseTest { private final static boolean DEBUG = false; private GenomeLocParser genomeLocParser; + private final static int MAX_PROB_PROPAGATION_DISTANCE = 50; + private final static double ACTIVE_PROB_THRESHOLD= 0.002; + @BeforeClass public void init() throws FileNotFoundException { // sequence @@ -91,7 +94,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, bandPassSize, sigma, false); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); @@ -155,7 +158,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test( enabled = ! DEBUG, dataProvider = "BandPassComposition") public void testBandPassComposition(final int bandPassSize, final int integrationLength) { final int start = 1; - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, + ACTIVE_PROB_THRESHOLD, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA); final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; // add a buffer so that we can get all of the band pass values @@ -228,7 +232,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test( enabled = ! DEBUG, dataProvider = "KernelCreation") public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD, + maxSize, sigma, true); final double[] kernel = profile.getKernel(); Assert.assertEquals(kernel.length, expectedKernel.length); @@ -268,8 +273,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest { final Pair> reader = GATKVCFUtils.readAllVCs(file, codec); final List incRegions = new ArrayList(); - final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null); - final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null); + final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); + final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null, MAX_PROB_PROPAGATION_DISTANCE, ACTIVE_PROB_THRESHOLD); int pos = start; for ( final VariantContext vc : reader.getSecond() ) { if ( vc == null ) continue; @@ -324,10 +329,10 @@ public class BandPassActivityProfileUnitTest extends BaseTest { lastPosSeen = region.getLocation().getStop(); for ( final ActivityProfileState state : region.getSupportingStates() ) { - Assert.assertEquals(state.isActiveProb > ActivityProfile.ACTIVE_PROB_THRESHOLD, region.isActive(), + Assert.assertEquals(state.isActiveProb > ACTIVE_PROB_THRESHOLD, region.isActive(), "Region is active=" + region.isActive() + " but contains a state " + state + " with prob " + state.isActiveProb + " not within expected values given threshold for activity of " - + ActivityProfile.ACTIVE_PROB_THRESHOLD); + + ACTIVE_PROB_THRESHOLD); } } }