Renaming IncrementalActivityProfile to ActivityProfile
-- Also adding a work in progress functionality to make it easy to visualize activity profiles and active regions in IGV
This commit is contained in:
parent
e917f56df8
commit
8e8126506b
|
|
@ -77,7 +77,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
|
|
||||||
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||||
private GenomeLoc spanOfLastReadSeen = null;
|
private GenomeLoc spanOfLastReadSeen = null;
|
||||||
private IncrementalActivityProfile activityProfile = null;
|
private ActivityProfile activityProfile = null;
|
||||||
int maxReadsInMemory = 0;
|
int maxReadsInMemory = 0;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -94,7 +94,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||||
walkerHasPresetRegions = arWalker.hasPresetActiveRegions();
|
walkerHasPresetRegions = arWalker.hasPresetActiveRegions();
|
||||||
|
|
||||||
activityProfile = new IncrementalActivityProfile(engine.getGenomeLocParser());
|
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser());
|
||||||
if ( walkerHasPresetRegions ) {
|
if ( walkerHasPresetRegions ) {
|
||||||
// we load all of the preset locations into the
|
// we load all of the preset locations into the
|
||||||
for ( final GenomeLoc loc : arWalker.getPresetActiveRegions()) {
|
for ( final GenomeLoc loc : arWalker.getPresetActiveRegions()) {
|
||||||
|
|
@ -349,6 +349,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
if ( ! walkerHasPresetRegions ) {
|
if ( ! walkerHasPresetRegions ) {
|
||||||
activityProfile.add(state);
|
activityProfile.add(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( walker.activityProfileOutStream != null )
|
||||||
|
walker.activityProfileOutStream.printf("%s\t%d\t%d\t%.2f%n",
|
||||||
|
locus.getLocation().getContig(), locus.getLocation().getStart(), locus.getLocation().getStart(),
|
||||||
|
state.isActiveProb);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -375,7 +381,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
// We don't have preset regions, so we get our regions from the activity profile
|
// We don't have preset regions, so we get our regions from the activity profile
|
||||||
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile);
|
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile);
|
||||||
workQueue.addAll(activeRegions);
|
workQueue.addAll(activeRegions);
|
||||||
if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( walker.activeRegionOutStream != null ) {
|
if ( walker.activeRegionOutStream != null ) {
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ import java.util.*;
|
||||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||||
@RemoveProgramRecords
|
@RemoveProgramRecords
|
||||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
|
@Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false)
|
||||||
|
public PrintStream activityProfileOutStream = null;
|
||||||
|
|
||||||
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
||||||
public PrintStream activeRegionOutStream = null;
|
public PrintStream activeRegionOutStream = null;
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,11 @@
|
||||||
package org.broadinstitute.sting.utils.activeregion;
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* Describes how a read relates to an assigned ActiveRegion
|
||||||
|
*
|
||||||
* User: thibault
|
* User: thibault
|
||||||
* Date: 11/26/12
|
* Date: 11/26/12
|
||||||
* Time: 2:35 PM
|
* Time: 2:35 PM
|
||||||
*
|
|
||||||
* Describes how a read relates to an assigned ActiveRegion
|
|
||||||
*/
|
*/
|
||||||
public enum ActiveRegionReadState {
|
public enum ActiveRegionReadState {
|
||||||
PRIMARY, // This is the read's primary region
|
PRIMARY, // This is the read's primary region
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ import java.util.*;
|
||||||
* @author Mark DePristo
|
* @author Mark DePristo
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class IncrementalActivityProfile {
|
public class ActivityProfile {
|
||||||
private final static int MAX_PROB_PROPOGATION_DISTANCE = 10;
|
private final static int MAX_PROB_PROPOGATION_DISTANCE = 10;
|
||||||
private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
|
private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
|
||||||
|
|
||||||
|
|
@ -50,10 +50,10 @@ public class IncrementalActivityProfile {
|
||||||
protected GenomeLoc regionStopLoc = null;
|
protected GenomeLoc regionStopLoc = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new empty IncrementalActivityProfile
|
* Create a new empty ActivityProfile
|
||||||
* @param parser the parser we can use to create genome locs, cannot be null
|
* @param parser the parser we can use to create genome locs, cannot be null
|
||||||
*/
|
*/
|
||||||
public IncrementalActivityProfile(final GenomeLocParser parser) {
|
public ActivityProfile(final GenomeLocParser parser) {
|
||||||
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
|
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
|
||||||
|
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
|
|
@ -79,7 +79,7 @@ public class IncrementalActivityProfile {
|
||||||
* @return a positive integer distance in bp
|
* @return a positive integer distance in bp
|
||||||
*/
|
*/
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public int getMaxProbPropogationDistance() {
|
public int getMaxProbPropagationDistance() {
|
||||||
return MAX_PROB_PROPOGATION_DISTANCE;
|
return MAX_PROB_PROPOGATION_DISTANCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -377,6 +377,6 @@ public class IncrementalActivityProfile {
|
||||||
}
|
}
|
||||||
|
|
||||||
// we're one past the end, so i must be decremented
|
// we're one past the end, so i must be decremented
|
||||||
return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1;
|
return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -30,7 +30,8 @@ import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created with IntelliJ IDEA.
|
* The state of an active region walker's isActive call at a specific locus in the genome
|
||||||
|
*
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: 7/27/12
|
* Date: 7/27/12
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ import java.util.LinkedList;
|
||||||
* @author Mark DePristo
|
* @author Mark DePristo
|
||||||
* @since 2011
|
* @since 2011
|
||||||
*/
|
*/
|
||||||
public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile {
|
public class BandPassActivityProfile extends ActivityProfile {
|
||||||
public static final int DEFAULT_FILTER_SIZE = 80;
|
public static final int DEFAULT_FILTER_SIZE = 80;
|
||||||
|
|
||||||
private final int filterSize;
|
private final int filterSize;
|
||||||
|
|
@ -52,7 +52,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
|
||||||
* Create a band pass activity profile with the default band size
|
* Create a band pass activity profile with the default band size
|
||||||
* @param parser our genome loc parser
|
* @param parser our genome loc parser
|
||||||
*/
|
*/
|
||||||
public BandPassIncrementalActivityProfile(final GenomeLocParser parser) {
|
public BandPassActivityProfile(final GenomeLocParser parser) {
|
||||||
this(parser, DEFAULT_FILTER_SIZE);
|
this(parser, DEFAULT_FILTER_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
|
||||||
* side that are included in the band. So a filter size of 1 implies that the actual band
|
* side that are included in the band. So a filter size of 1 implies that the actual band
|
||||||
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
|
* is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc.
|
||||||
*/
|
*/
|
||||||
public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) {
|
public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) {
|
||||||
super(parser);
|
super(parser);
|
||||||
|
|
||||||
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
|
if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize);
|
||||||
|
|
@ -77,6 +77,19 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi
|
||||||
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
|
this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Our maximize propagation distance is whatever our parent's is, plus our filter size
|
||||||
|
*
|
||||||
|
* Stops the profile from interpreting sites that aren't yet fully determined due to
|
||||||
|
* propagation of the probabilities.
|
||||||
|
*
|
||||||
|
* @return the distance in bp we might move our probabilities around for some site i
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getMaxProbPropagationDistance() {
|
||||||
|
return super.getMaxProbPropagationDistance() + filterSize;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the size (in bp) of the band pass filter
|
* Get the size (in bp) of the band pass filter
|
||||||
* @return a positive integer
|
* @return a positive integer
|
||||||
|
|
@ -45,7 +45,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class IncrementalActivityProfileUnitTest extends BaseTest {
|
public class ActivityProfileUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
private GenomeLoc startLoc;
|
private GenomeLoc startLoc;
|
||||||
|
|
||||||
|
|
@ -82,12 +82,12 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions));
|
return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions));
|
||||||
}
|
}
|
||||||
|
|
||||||
public IncrementalActivityProfile makeProfile() {
|
public ActivityProfile makeProfile() {
|
||||||
switch ( type ) {
|
switch ( type ) {
|
||||||
case Base: return new IncrementalActivityProfile(genomeLocParser);
|
case Base: return new ActivityProfile(genomeLocParser);
|
||||||
case BandPass:
|
case BandPass:
|
||||||
// zero size => equivalent to IncrementalActivityProfile
|
// zero size => equivalent to ActivityProfile
|
||||||
return new BandPassIncrementalActivityProfile(genomeLocParser, 0);
|
return new BandPassActivityProfile(genomeLocParser, 0);
|
||||||
default: throw new IllegalStateException(type.toString());
|
default: throw new IllegalStateException(type.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -125,7 +125,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(dataProvider = "BasicActivityProfileTestProvider")
|
@Test(dataProvider = "BasicActivityProfileTestProvider")
|
||||||
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
|
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
|
||||||
IncrementalActivityProfile profile = cfg.makeProfile();
|
ActivityProfile profile = cfg.makeProfile();
|
||||||
|
|
||||||
Assert.assertTrue(profile.isEmpty());
|
Assert.assertTrue(profile.isEmpty());
|
||||||
|
|
||||||
|
|
@ -228,7 +228,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "RegionCreationTests")
|
@Test(enabled = true, dataProvider = "RegionCreationTests")
|
||||||
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
|
public void testRegionCreation(final int start, final List<Boolean> probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) {
|
||||||
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser);
|
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||||
Assert.assertNotNull(profile.toString());
|
Assert.assertNotNull(profile.toString());
|
||||||
|
|
||||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||||
|
|
@ -253,7 +253,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( int i = 0; i < probs.size(); i++ ) {
|
for ( int i = 0; i < probs.size(); i++ ) {
|
||||||
if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size()))
|
if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size()))
|
||||||
// only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
|
// only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end
|
||||||
Assert.assertTrue(seenSites.get(i), "Missed site " + i);
|
Assert.assertTrue(seenSites.get(i), "Missed site " + i);
|
||||||
}
|
}
|
||||||
|
|
@ -314,7 +314,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(dataProvider = "SoftClipsTest")
|
@Test(dataProvider = "SoftClipsTest")
|
||||||
public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) {
|
public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) {
|
||||||
final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser);
|
final ActivityProfile profile = new ActivityProfile(genomeLocParser);
|
||||||
|
|
||||||
final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength();
|
final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength();
|
||||||
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName();
|
||||||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
|
|
@ -47,7 +46,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
|
public class BandPassActivityProfileUnitTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
|
|
@ -80,7 +79,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(dataProvider = "BandPassBasicTest")
|
@Test(dataProvider = "BandPassBasicTest")
|
||||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
|
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) {
|
||||||
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize);
|
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
||||||
|
|
||||||
final int expectedBandSize = bandPassSize * 2 + 1;
|
final int expectedBandSize = bandPassSize * 2 + 1;
|
||||||
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
|
Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size");
|
||||||
|
|
@ -103,7 +102,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) {
|
private double[] bandPassInOnePass(final BandPassActivityProfile profile, final double[] activeProbArray) {
|
||||||
final double[] bandPassProbArray = new double[activeProbArray.length];
|
final double[] bandPassProbArray = new double[activeProbArray.length];
|
||||||
|
|
||||||
// apply the band pass filter for activeProbArray into filteredProbArray
|
// apply the band pass filter for activeProbArray into filteredProbArray
|
||||||
|
|
@ -121,7 +120,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
public Object[][] makeBandPassComposition() {
|
public Object[][] makeBandPassComposition() {
|
||||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||||
|
|
||||||
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) {
|
for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) {
|
||||||
for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) {
|
for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) {
|
||||||
tests.add(new Object[]{ bandPassSize, integrationLength });
|
tests.add(new Object[]{ bandPassSize, integrationLength });
|
||||||
}
|
}
|
||||||
|
|
@ -133,7 +132,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest {
|
||||||
@Test( dataProvider = "BandPassComposition")
|
@Test( dataProvider = "BandPassComposition")
|
||||||
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
||||||
final int start = 1;
|
final int start = 1;
|
||||||
final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize);
|
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize);
|
||||||
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
||||||
|
|
||||||
// add a buffer so that we can get all of the band pass values
|
// add a buffer so that we can get all of the band pass values
|
||||||
Loading…
Reference in New Issue