Renaming IncrementalActivityProfile to ActivityProfile

-- Also adding a work in progress functionality to make it easy to visualize activity profiles and active regions in IGV
This commit is contained in:
Mark DePristo 2013-01-23 09:44:46 -05:00
parent e917f56df8
commit 8e8126506b
8 changed files with 50 additions and 30 deletions

View File

@ -77,7 +77,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>(); private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
private GenomeLoc spanOfLastReadSeen = null; private GenomeLoc spanOfLastReadSeen = null;
private IncrementalActivityProfile activityProfile = null; private ActivityProfile activityProfile = null;
int maxReadsInMemory = 0; int maxReadsInMemory = 0;
@Override @Override
@ -94,7 +94,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
walkerHasPresetRegions = arWalker.hasPresetActiveRegions(); walkerHasPresetRegions = arWalker.hasPresetActiveRegions();
activityProfile = new IncrementalActivityProfile(engine.getGenomeLocParser()); activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
if ( walkerHasPresetRegions ) { if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the // we load all of the preset locations into the
for ( final GenomeLoc loc : arWalker.getPresetActiveRegions()) { for ( final GenomeLoc loc : arWalker.getPresetActiveRegions()) {
@ -349,6 +349,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
if ( ! walkerHasPresetRegions ) { if ( ! walkerHasPresetRegions ) {
activityProfile.add(state); activityProfile.add(state);
} }
if ( walker.activityProfileOutStream != null )
walker.activityProfileOutStream.printf("%s\t%d\t%d\t%.2f%n",
locus.getLocation().getContig(), locus.getLocation().getStart(), locus.getLocation().getStart(),
state.isActiveProb);
} }
/** /**
@ -375,7 +381,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
// We don't have preset regions, so we get our regions from the activity profile // We don't have preset regions, so we get our regions from the activity profile
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile);
workQueue.addAll(activeRegions); workQueue.addAll(activeRegions);
if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." );
} }
if ( walker.activeRegionOutStream != null ) { if ( walker.activeRegionOutStream != null ) {

View File

@ -61,6 +61,8 @@ import java.util.*;
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords @RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false)
public PrintStream activityProfileOutStream = null;
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
public PrintStream activeRegionOutStream = null; public PrintStream activeRegionOutStream = null;

View File

@ -26,12 +26,11 @@
package org.broadinstitute.sting.utils.activeregion; package org.broadinstitute.sting.utils.activeregion;
/** /**
* Created with IntelliJ IDEA. * Describes how a read relates to an assigned ActiveRegion
*
* User: thibault * User: thibault
* Date: 11/26/12 * Date: 11/26/12
* Time: 2:35 PM * Time: 2:35 PM
*
* Describes how a read relates to an assigned ActiveRegion
*/ */
public enum ActiveRegionReadState { public enum ActiveRegionReadState {
PRIMARY, // This is the read's primary region PRIMARY, // This is the read's primary region

View File

@ -39,7 +39,7 @@ import java.util.*;
* @author Mark DePristo * @author Mark DePristo
* @since Date created * @since Date created
*/ */
public class IncrementalActivityProfile { public class ActivityProfile {
private final static int MAX_PROB_PROPOGATION_DISTANCE = 10; private final static int MAX_PROB_PROPOGATION_DISTANCE = 10;
private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
@ -50,10 +50,10 @@ public class IncrementalActivityProfile {
protected GenomeLoc regionStopLoc = null; protected GenomeLoc regionStopLoc = null;
/** /**
* Create a new empty IncrementalActivityProfile * Create a new empty ActivityProfile
* @param parser the parser we can use to create genome locs, cannot be null * @param parser the parser we can use to create genome locs, cannot be null
*/ */
public IncrementalActivityProfile(final GenomeLocParser parser) { public ActivityProfile(final GenomeLocParser parser) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.parser = parser; this.parser = parser;
@ -79,7 +79,7 @@ public class IncrementalActivityProfile {
* @return a positive integer distance in bp * @return a positive integer distance in bp
*/ */
@Ensures("result >= 0") @Ensures("result >= 0")
public int getMaxProbPropogationDistance() { public int getMaxProbPropagationDistance() {
return MAX_PROB_PROPOGATION_DISTANCE; return MAX_PROB_PROPOGATION_DISTANCE;
} }
@ -377,6 +377,6 @@ public class IncrementalActivityProfile {
} }
// we're one past the end, so i must be decremented // we're one past the end, so i must be decremented
return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1; return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1;
} }
} }

View File

@ -30,7 +30,8 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
/** /**
* Created with IntelliJ IDEA. * The state of an active region walker's isActive call at a specific locus in the genome
*
* User: rpoplin * User: rpoplin
* Date: 7/27/12 * Date: 7/27/12
*/ */

View File

@ -42,7 +42,7 @@ import java.util.LinkedList;
* @author Mark DePristo * @author Mark DePristo
* @since 2011 * @since 2011
*/ */
public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile { public class BandPassActivityProfile extends ActivityProfile {
public static final int DEFAULT_FILTER_SIZE = 80; public static final int DEFAULT_FILTER_SIZE = 80;
private final int filterSize; private final int filterSize;
@ -52,7 +52,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
* Create a band pass activity profile with the default band size * Create a band pass activity profile with the default band size
* @param parser our genome loc parser * @param parser our genome loc parser
*/ */
public BandPassIncrementalActivityProfile(final GenomeLocParser parser) { public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, DEFAULT_FILTER_SIZE); this(parser, DEFAULT_FILTER_SIZE);
} }
@ -63,7 +63,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
* side that are included in the band. So a filter size of 1 implies that the actual band * side that are included in the band. So a filter size of 1 implies that the actual band
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc. * is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
*/ */
public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) { public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
super(parser); super(parser);
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
@ -77,6 +77,19 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
} }
/**
* Our maximize propagation distance is whatever our parent's is, plus our filter size
*
* Stops the profile from interpreting sites that aren't yet fully determined due to
* propagation of the probabilities.
*
* @return the distance in bp we might move our probabilities around for some site i
*/
@Override
public int getMaxProbPropagationDistance() {
return super.getMaxProbPropagationDistance() + filterSize;
}
/** /**
* Get the size (in bp) of the band pass filter * Get the size (in bp) of the band pass filter
* @return a positive integer * @return a positive integer

View File

@ -45,7 +45,7 @@ import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class IncrementalActivityProfileUnitTest extends BaseTest { public class ActivityProfileUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
private GenomeLoc startLoc; private GenomeLoc startLoc;
@ -82,12 +82,12 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions));
} }
public IncrementalActivityProfile makeProfile() { public ActivityProfile makeProfile() {
switch ( type ) { switch ( type ) {
case Base: return new IncrementalActivityProfile(genomeLocParser); case Base: return new ActivityProfile(genomeLocParser);
case BandPass: case BandPass:
// zero size => equivalent to IncrementalActivityProfile // zero size => equivalent to ActivityProfile
return new BandPassIncrementalActivityProfile(genomeLocParser, 0); return new BandPassActivityProfile(genomeLocParser, 0);
default: throw new IllegalStateException(type.toString()); default: throw new IllegalStateException(type.toString());
} }
} }
@ -125,7 +125,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "BasicActivityProfileTestProvider") @Test(dataProvider = "BasicActivityProfileTestProvider")
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
IncrementalActivityProfile profile = cfg.makeProfile(); ActivityProfile profile = cfg.makeProfile();
Assert.assertTrue(profile.isEmpty()); Assert.assertTrue(profile.isEmpty());
@ -228,7 +228,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "RegionCreationTests") @Test(enabled = true, dataProvider = "RegionCreationTests")
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); final ActivityProfile profile = new ActivityProfile(genomeLocParser);
Assert.assertNotNull(profile.toString()); Assert.assertNotNull(profile.toString());
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
@ -253,7 +253,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
} }
for ( int i = 0; i < probs.size(); i++ ) { for ( int i = 0; i < probs.size(); i++ ) {
if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size())) if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size()))
// only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end // only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
Assert.assertTrue(seenSites.get(i), "Missed site " + i); Assert.assertTrue(seenSites.get(i), "Missed site " + i);
} }
@ -314,7 +314,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "SoftClipsTest") @Test(dataProvider = "SoftClipsTest")
public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) {
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); final ActivityProfile profile = new ActivityProfile(genomeLocParser);
final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength();
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
@ -47,7 +46,7 @@ import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { public class BandPassActivityProfileUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
@BeforeClass @BeforeClass
@ -80,7 +79,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "BandPassBasicTest") @Test(dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) { public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
final int expectedBandSize = bandPassSize * 2 + 1; final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size"); Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
@ -103,7 +102,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
} }
} }
private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) { private double[] bandPassInOnePass(final BandPassActivityProfile profile, final double[] activeProbArray) {
final double[] bandPassProbArray = new double[activeProbArray.length]; final double[] bandPassProbArray = new double[activeProbArray.length];
// apply the band pass filter for activeProbArray into filteredProbArray // apply the band pass filter for activeProbArray into filteredProbArray
@ -121,7 +120,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
public Object[][] makeBandPassComposition() { public Object[][] makeBandPassComposition() {
final List<Object[]> tests = new LinkedList<Object[]>(); final List<Object[]> tests = new LinkedList<Object[]>();
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) { for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) {
for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) {
tests.add(new Object[]{ bandPassSize, integrationLength }); tests.add(new Object[]{ bandPassSize, integrationLength });
} }
@ -133,7 +132,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
@Test( dataProvider = "BandPassComposition") @Test( dataProvider = "BandPassComposition")
public void testBandPassComposition(final int bandPassSize, final int integrationLength) { public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
final int start = 1; final int start = 1;
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
// add a buffer so that we can get all of the band pass values // add a buffer so that we can get all of the band pass values