Renaming IncrementalActivityProfile to ActivityProfile

-- Also adding a work in progress functionality to make it easy to visualize activity profiles and active regions in IGV
This commit is contained in:
Mark DePristo 2013-01-23 09:44:46 -05:00
parent e917f56df8
commit 8e8126506b
8 changed files with 50 additions and 30 deletions

View File

@ -77,7 +77,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
private GenomeLoc spanOfLastReadSeen = null;
private IncrementalActivityProfile activityProfile = null;
private ActivityProfile activityProfile = null;
int maxReadsInMemory = 0;
@Override
@ -94,7 +94,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
walkerHasPresetRegions = arWalker.hasPresetActiveRegions();
activityProfile = new IncrementalActivityProfile(engine.getGenomeLocParser());
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the
for ( final GenomeLoc loc : arWalker.getPresetActiveRegions()) {
@ -349,6 +349,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
if ( ! walkerHasPresetRegions ) {
activityProfile.add(state);
}
if ( walker.activityProfileOutStream != null )
walker.activityProfileOutStream.printf("%s\t%d\t%d\t%.2f%n",
locus.getLocation().getContig(), locus.getLocation().getStart(), locus.getLocation().getStart(),
state.isActiveProb);
}
/**
@ -375,7 +381,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
// We don't have preset regions, so we get our regions from the activity profile
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile);
workQueue.addAll(activeRegions);
if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." );
if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." );
}
if ( walker.activeRegionOutStream != null ) {

View File

@ -61,6 +61,8 @@ import java.util.*;
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false)
public PrintStream activityProfileOutStream = null;
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
public PrintStream activeRegionOutStream = null;

View File

@ -26,12 +26,11 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* Describes how a read relates to an assigned ActiveRegion
*
* User: thibault
* Date: 11/26/12
* Time: 2:35 PM
*
* Describes how a read relates to an assigned ActiveRegion
*/
public enum ActiveRegionReadState {
PRIMARY, // This is the read's primary region

View File

@ -39,7 +39,7 @@ import java.util.*;
* @author Mark DePristo
* @since Date created
*/
public class IncrementalActivityProfile {
public class ActivityProfile {
private final static int MAX_PROB_PROPOGATION_DISTANCE = 10;
private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
@ -50,10 +50,10 @@ public class IncrementalActivityProfile {
protected GenomeLoc regionStopLoc = null;
/**
* Create a new empty IncrementalActivityProfile
* Create a new empty ActivityProfile
* @param parser the parser we can use to create genome locs, cannot be null
*/
public IncrementalActivityProfile(final GenomeLocParser parser) {
public ActivityProfile(final GenomeLocParser parser) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.parser = parser;
@ -79,7 +79,7 @@ public class IncrementalActivityProfile {
* @return a positive integer distance in bp
*/
@Ensures("result >= 0")
public int getMaxProbPropogationDistance() {
public int getMaxProbPropagationDistance() {
return MAX_PROB_PROPOGATION_DISTANCE;
}
@ -377,6 +377,6 @@ public class IncrementalActivityProfile {
}
// we're one past the end, so i must be decremented
return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1;
return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1;
}
}

View File

@ -30,7 +30,8 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Created with IntelliJ IDEA.
* The state of an active region walker's isActive call at a specific locus in the genome
*
* User: rpoplin
* Date: 7/27/12
*/

View File

@ -42,7 +42,7 @@ import java.util.LinkedList;
* @author Mark DePristo
* @since 2011
*/
public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile {
public class BandPassActivityProfile extends ActivityProfile {
public static final int DEFAULT_FILTER_SIZE = 80;
private final int filterSize;
@ -52,7 +52,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
* Create a band pass activity profile with the default band size
* @param parser our genome loc parser
*/
public BandPassIncrementalActivityProfile(final GenomeLocParser parser) {
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, DEFAULT_FILTER_SIZE);
}
@ -63,7 +63,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
* side that are included in the band. So a filter size of 1 implies that the actual band
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
*/
public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) {
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
super(parser);
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
@ -77,6 +77,19 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
}
/**
* Our maximize propagation distance is whatever our parent's is, plus our filter size
*
* Stops the profile from interpreting sites that aren't yet fully determined due to
* propagation of the probabilities.
*
* @return the distance in bp we might move our probabilities around for some site i
*/
@Override
public int getMaxProbPropagationDistance() {
return super.getMaxProbPropagationDistance() + filterSize;
}
/**
* Get the size (in bp) of the band pass filter
* @return a positive integer

View File

@ -45,7 +45,7 @@ import java.io.FileNotFoundException;
import java.util.*;
public class IncrementalActivityProfileUnitTest extends BaseTest {
public class ActivityProfileUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private GenomeLoc startLoc;
@ -82,12 +82,12 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions));
}
public IncrementalActivityProfile makeProfile() {
public ActivityProfile makeProfile() {
switch ( type ) {
case Base: return new IncrementalActivityProfile(genomeLocParser);
case Base: return new ActivityProfile(genomeLocParser);
case BandPass:
// zero size => equivalent to IncrementalActivityProfile
return new BandPassIncrementalActivityProfile(genomeLocParser, 0);
// zero size => equivalent to ActivityProfile
return new BandPassActivityProfile(genomeLocParser, 0);
default: throw new IllegalStateException(type.toString());
}
}
@ -125,7 +125,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "BasicActivityProfileTestProvider")
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
IncrementalActivityProfile profile = cfg.makeProfile();
ActivityProfile profile = cfg.makeProfile();
Assert.assertTrue(profile.isEmpty());
@ -228,7 +228,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "RegionCreationTests")
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser);
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
Assert.assertNotNull(profile.toString());
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
@ -253,7 +253,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
}
for ( int i = 0; i < probs.size(); i++ ) {
if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size()))
if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size()))
// only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
Assert.assertTrue(seenSites.get(i), "Missed site " + i);
}
@ -314,7 +314,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "SoftClipsTest")
public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) {
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser);
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength();
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -47,7 +46,7 @@ import java.io.FileNotFoundException;
import java.util.*;
public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
public class BandPassActivityProfileUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
@BeforeClass
@ -80,7 +79,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
@Test(dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
@ -103,7 +102,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
}
}
private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) {
private double[] bandPassInOnePass(final BandPassActivityProfile profile, final double[] activeProbArray) {
final double[] bandPassProbArray = new double[activeProbArray.length];
// apply the band pass filter for activeProbArray into filteredProbArray
@ -121,7 +120,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
public Object[][] makeBandPassComposition() {
final List<Object[]> tests = new LinkedList<Object[]>();
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) {
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) {
for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) {
tests.add(new Object[]{ bandPassSize, integrationLength });
}
@ -133,7 +132,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
@Test( dataProvider = "BandPassComposition")
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
final int start = 1;
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
// add a buffer so that we can get all of the band pass values