From 8e8126506b81fccf34a06aec8dcad873f2f28e09 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 09:44:46 -0500 Subject: [PATCH] Renaming IncrementalActivityProfile to ActivityProfile -- Also adding a work in progress functionality to make it easy to visualize activity profiles and active regions in IGV --- .../traversals/TraverseActiveRegions.java | 12 +++++++++--- .../gatk/walkers/ActiveRegionWalker.java | 2 ++ .../activeregion/ActiveRegionReadState.java | 5 ++--- ...ivityProfile.java => ActivityProfile.java} | 10 +++++----- .../activeregion/ActivityProfileState.java | 3 ++- ...file.java => BandPassActivityProfile.java} | 19 ++++++++++++++++--- ...Test.java => ActivityProfileUnitTest.java} | 18 +++++++++--------- ...a => BandPassActivityProfileUnitTest.java} | 11 +++++------ 8 files changed, 50 insertions(+), 30 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/activeregion/{IncrementalActivityProfile.java => ActivityProfile.java} (98%) rename public/java/src/org/broadinstitute/sting/utils/activeregion/{BandPassIncrementalActivityProfile.java => BandPassActivityProfile.java} (87%) rename public/java/test/org/broadinstitute/sting/utils/activeregion/{IncrementalActivityProfileUnitTest.java => ActivityProfileUnitTest.java} (95%) rename public/java/test/org/broadinstitute/sting/utils/activeregion/{BandPassIncrementalActivityProfileUnitTest.java => BandPassActivityProfileUnitTest.java} (93%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 436edbdf1..071b4d806 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -77,7 +77,7 @@ public class TraverseActiveRegions extends TraversalEngine myReads = new LinkedList(); private GenomeLoc spanOfLastReadSeen = null; - private IncrementalActivityProfile activityProfile = null; + private ActivityProfile activityProfile = null; int maxReadsInMemory = 0; @Override @@ -94,7 +94,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); workQueue.addAll(activeRegions); - if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); } if ( walker.activeRegionOutStream != null ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 85d7c8293..e268bba0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -61,6 +61,8 @@ import java.util.*; @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { + @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false) + public PrintStream activityProfileOutStream = null; @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) public PrintStream activeRegionOutStream = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java index d9b458f51..5da88cb6d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.utils.activeregion; /** - * Created with IntelliJ IDEA. + * Describes how a read relates to an assigned ActiveRegion + * * User: thibault * Date: 11/26/12 * Time: 2:35 PM - * - * Describes how a read relates to an assigned ActiveRegion */ public enum ActiveRegionReadState { PRIMARY, // This is the read's primary region diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 1292b3176..a863d695e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -39,7 +39,7 @@ import java.util.*; * @author Mark DePristo * @since Date created */ -public class IncrementalActivityProfile { +public class ActivityProfile { private final static int MAX_PROB_PROPOGATION_DISTANCE = 10; private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author @@ -50,10 +50,10 @@ public class IncrementalActivityProfile { protected GenomeLoc regionStopLoc = null; /** - * Create a new empty IncrementalActivityProfile + * Create a new empty ActivityProfile * @param parser the parser we can use to create genome locs, cannot be null */ - public IncrementalActivityProfile(final GenomeLocParser parser) { + public ActivityProfile(final GenomeLocParser parser) { if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); this.parser = parser; @@ -79,7 +79,7 @@ public class IncrementalActivityProfile { * @return a positive integer distance in bp */ @Ensures("result >= 0") - public int getMaxProbPropogationDistance() { + public int getMaxProbPropagationDistance() { return MAX_PROB_PROPOGATION_DISTANCE; } @@ -377,6 +377,6 @@ public class IncrementalActivityProfile { } // we're one past the end, so i must be decremented - return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1; + return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index df21672a9..272596be3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -30,7 +30,8 @@ import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.GenomeLoc; /** - * Created with IntelliJ IDEA. + * The state of an active region walker's isActive call at a specific locus in the genome + * * User: rpoplin * Date: 7/27/12 */ diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java similarity index 87% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java index 805a0b60a..1a8bac086 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -42,7 +42,7 @@ import java.util.LinkedList; * @author Mark DePristo * @since 2011 */ -public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile { +public class BandPassActivityProfile extends ActivityProfile { public static final int DEFAULT_FILTER_SIZE = 80; private final int filterSize; @@ -52,7 +52,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi * Create a band pass activity profile with the default band size * @param parser our genome loc parser */ - public BandPassIncrementalActivityProfile(final GenomeLocParser parser) { + public BandPassActivityProfile(final GenomeLocParser parser) { this(parser, DEFAULT_FILTER_SIZE); } @@ -63,7 +63,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi * side that are included in the band. So a filter size of 1 implies that the actual band * is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc. */ - public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) { + public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) { super(parser); if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); @@ -77,6 +77,19 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); } + /** + * Our maximize propagation distance is whatever our parent's is, plus our filter size + * + * Stops the profile from interpreting sites that aren't yet fully determined due to + * propagation of the probabilities. + * + * @return the distance in bp we might move our probabilities around for some site i + */ + @Override + public int getMaxProbPropagationDistance() { + return super.getMaxProbPropagationDistance() + filterSize; + } + /** * Get the size (in bp) of the band pass filter * @return a positive integer diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 64065029c..7cfc5ebb7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -45,7 +45,7 @@ import java.io.FileNotFoundException; import java.util.*; -public class IncrementalActivityProfileUnitTest extends BaseTest { +public class ActivityProfileUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; private GenomeLoc startLoc; @@ -82,12 +82,12 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); } - public IncrementalActivityProfile makeProfile() { + public ActivityProfile makeProfile() { switch ( type ) { - case Base: return new IncrementalActivityProfile(genomeLocParser); + case Base: return new ActivityProfile(genomeLocParser); case BandPass: - // zero size => equivalent to IncrementalActivityProfile - return new BandPassIncrementalActivityProfile(genomeLocParser, 0); + // zero size => equivalent to ActivityProfile + return new BandPassActivityProfile(genomeLocParser, 0); default: throw new IllegalStateException(type.toString()); } } @@ -125,7 +125,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - IncrementalActivityProfile profile = cfg.makeProfile(); + ActivityProfile profile = cfg.makeProfile(); Assert.assertTrue(profile.isEmpty()); @@ -228,7 +228,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "RegionCreationTests") public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { - final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser); Assert.assertNotNull(profile.toString()); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); @@ -253,7 +253,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { } for ( int i = 0; i < probs.size(); i++ ) { - if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size())) + if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size())) // only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end Assert.assertTrue(seenSites.get(i), "Missed site " + i); } @@ -314,7 +314,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "SoftClipsTest") public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { - final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser); final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java similarity index 93% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index be90353b3..a2a85f1d0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -47,7 +46,7 @@ import java.io.FileNotFoundException; import java.util.*; -public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { +public class BandPassActivityProfileUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; @BeforeClass @@ -80,7 +79,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) { - final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size"); @@ -103,7 +102,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { } } - private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) { + private double[] bandPassInOnePass(final BandPassActivityProfile profile, final double[] activeProbArray) { final double[] bandPassProbArray = new double[activeProbArray.length]; // apply the band pass filter for activeProbArray into filteredProbArray @@ -121,7 +120,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { public Object[][] makeBandPassComposition() { final List tests = new LinkedList(); - for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { tests.add(new Object[]{ bandPassSize, integrationLength }); } @@ -133,7 +132,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { @Test( dataProvider = "BandPassComposition") public void testBandPassComposition(final int bandPassSize, final int integrationLength) { final int start = 1; - final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; // add a buffer so that we can get all of the band pass values