Remove old ActivityProfile and old BandPassActivityProfile

This commit is contained in:
Mark DePristo 2013-01-22 16:02:09 -05:00
parent 7fd27a5167
commit e917f56df8
3 changed files with 0 additions and 493 deletions

View File

@ -1,243 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.activeregion;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Class holding information about per-base activity scores for the
* active region traversal
*
* @author Mark DePristo
* @since Date created
*/
public class ActivityProfile {
private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
protected final List<ActivityProfileState> isActiveList;
protected final GenomeLocParser parser;
protected GenomeLoc regionStartLoc = null;
protected GenomeLoc regionStopLoc = null;
public ActivityProfile(final GenomeLocParser parser) {
this(parser, new ArrayList<ActivityProfileState>(), null);
}
protected ActivityProfile(final GenomeLocParser parser, final List<ActivityProfileState> isActiveList, final GenomeLoc regionStartLoc) {
this.parser = parser;
this.isActiveList = isActiveList;
this.regionStartLoc = regionStartLoc;
}
/**
* Create a profile of the same class as this object containing just the provided stateList
*
* Used by clients to create derived activity profiles (such as ones without the starting X
* sites because they've been removed in an ActiveRegion) of the same class.
*
* @param isActiveList the active results list to use in the derived instance
* @return a freshly allocated data set
*/
protected ActivityProfile createDerivedProfile(final List<ActivityProfileState> isActiveList) {
return new ActivityProfile(parser, isActiveList, regionStartLoc);
}
@Override
public String toString() {
return "ActivityProfile{" +
"start=" + regionStartLoc +
", stop=" + regionStopLoc +
'}';
}
/**
* Add the next ActivityProfileState to this profile.
*
* Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown
*
* @param result a well-formed ActivityProfileState result to incorporate into this profile
*/
@Requires("result != null")
public void add(final ActivityProfileState result) {
final GenomeLoc loc = result.getLoc();
if ( regionStartLoc == null ) {
regionStartLoc = loc;
regionStopLoc = loc;
} else {
if ( regionStopLoc.getStart() != loc.getStart() - 1 )
throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc );
regionStopLoc = loc;
}
isActiveList.add(result);
}
/**
* How many profile results are in this profile?
* @return the number of profile results
*/
@Ensures("result >= 0")
public int size() {
return isActiveList.size();
}
/**
* Is this profile empty?
* @return true if the profile is empty
*/
@Ensures("isEmpty() == (size() == 0)")
public boolean isEmpty() {
return isActiveList.isEmpty();
}
/**
* Get the list of active profile results in this object
* @return a non-null, ordered list of active profile results
*/
@Ensures("result != null")
protected List<ActivityProfileState> getActiveList() {
return isActiveList;
}
/**
* Finalize the probabilities in this activity profile, preparing it for a future
* call to createActiveRegions. This function returns a new profile with cleaned
* up activity estimates.
*
* This code looks at the current list of states, cleans them up, and then returns
* a newly allocated ActivityProfile
*
* @return a newly allocated ActivityProfile based on the current state of this
* profile, but that has been "finalized" as required by the profile implementation
*/
public ActivityProfile finalizeProfile() {
int iii = 0;
for( final double prob : finalizeProbabilities() ) {
final ActivityProfileState result = isActiveList.get(iii++);
result.isActiveProb = prob;
result.resultState = ActivityProfileState.Type.NONE;
result.resultValue = null;
}
return createDerivedProfile(isActiveList);
}
public double[] finalizeProbabilities() {
final double[] activeProbArray = new double[isActiveList.size()];
int iii = 0;
for( final ActivityProfileState result : isActiveList ) {
activeProbArray[iii++] = result.isActiveProb;
}
iii = 0;
for( final ActivityProfileState result : isActiveList ) {
if( result.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
final int numHQClips = result.resultValue.intValue();
for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
}
}
iii++;
}
return activeProbArray;
}
/**
* Partition this profile into active regions
* @param activeRegionExtension the amount of margin overlap in the active region
* @return the list of active regions
*/
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
if( isActiveList.size() == 0 ) {
// no elements in the active list, just return an empty one
return Collections.emptyList();
} else if( isActiveList.size() == 1 ) {
// there's a single element, it's either active or inactive
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
} else {
// there are 2+ elements, divide these up into regions
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
int curStart = 0;
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD;
if( isActive != thisStatus ) {
returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
isActive = thisStatus;
curStart = iii;
}
}
returnList.addAll(createActiveRegion(isActive, curStart, isActiveList.size() - 1, activeRegionExtension, maxRegionSize)); // close out the current active region
}
return returnList;
}
/**
* Helper routine to create an active region based on our current start and end offsets
* @param isActive should the region be active?
* @param curStart offset (0-based) from the start of this region
* @param curEnd offset (0-based) from the start of this region
* @param activeRegionExtension the amount of margin overlap in the active region
* @return a fully initialized ActiveRegion with the above properties
*/
private List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
}
private List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List<ActiveRegion> returnList) {
if( !isActive || curEnd - curStart < maxRegionSize ) {
final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension));
return returnList;
}
// find the best place to break up the large active region
Double minProb = Double.MAX_VALUE;
int cutPoint = -1;
final int size = curEnd - curStart + 1;
for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
}
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
returnList.addAll( leftList );
returnList.addAll( rightList );
return returnList;
}
}

View File

@ -1,84 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.activeregion;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.ArrayList;
import java.util.List;
/**
*
*
* @author Mark DePristo
* @since 2011
*/
public class BandPassActivityProfile extends ActivityProfile {
private static final int FILTER_SIZE = 80;
private static final double[] GaussianKernel;
static {
GaussianKernel = new double[2*FILTER_SIZE + 1];
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
}
}
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, new ArrayList<ActivityProfileState>(), null);
}
public BandPassActivityProfile(final GenomeLocParser parser, final List<ActivityProfileState> isActiveList, final GenomeLoc regionStartLoc) {
super(parser, isActiveList, regionStartLoc);
}
@Override
protected ActivityProfile createDerivedProfile(List<ActivityProfileState> isActiveList) {
return new BandPassActivityProfile(parser, isActiveList, regionStartLoc);
}
/**
* Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered
* @return a new double[] that's the band-pass filtered version of this profile
*/
@Override
public double[] finalizeProbabilities() {
final double[] activeProbArray = super.finalizeProbabilities();
final double[] bandPassProbArray = new double[activeProbArray.length];
// apply the band pass filter for activeProbArray into filteredProbArray
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
}
return bandPassProbArray;
}
}

View File

@ -1,166 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.activeregion;
// the imports for unit testing.
import net.sf.picard.reference.ReferenceSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
public class ActivityProfileUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private GenomeLoc startLoc;
@BeforeClass
public void init() throws FileNotFoundException {
// sequence
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
genomeLocParser = new GenomeLocParser(seq);
startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100);
}
// --------------------------------------------------------------------------------
//
// Basic tests Provider
//
// --------------------------------------------------------------------------------
private class BasicActivityProfileTestProvider extends TestDataProvider {
List<Double> probs;
List<ActiveRegion> expectedRegions;
int extension = 0;
GenomeLoc regionStart = startLoc;
final ProfileType type;
public BasicActivityProfileTestProvider(final ProfileType type, final List<Double> probs, boolean startActive, int ... startsAndStops) {
super(BasicActivityProfileTestProvider.class);
this.type = type;
this.probs = probs;
this.expectedRegions = toRegions(startActive, startsAndStops);
setName(getName());
}
private String getName() {
return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions));
}
public ActivityProfile makeProfile() {
switch ( type ) {
case Base: return new ActivityProfile(genomeLocParser);
case BandPass: return new BandPassActivityProfile(genomeLocParser);
default: throw new IllegalStateException(type.toString());
}
}
private List<ActiveRegion> toRegions(boolean isActive, int[] startsAndStops) {
List<ActiveRegion> l = new ArrayList<ActiveRegion>();
for ( int i = 0; i < startsAndStops.length - 1; i++) {
int start = regionStart.getStart() + startsAndStops[i];
int end = regionStart.getStart() + startsAndStops[i+1] - 1;
GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end);
ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension);
l.add(r);
isActive = ! isActive;
}
return l;
}
}
private enum ProfileType {
Base, BandPass
}
@DataProvider(name = "BasicActivityProfileTestProvider")
public Object[][] makeQualIntervalTestProvider() {
for ( final ProfileType type : ProfileType.values() ) {
new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1);
new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2);
new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2);
new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3);
new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3);
}
return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class);
}
@Test(dataProvider = "BasicActivityProfileTestProvider")
public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) {
ActivityProfile profile = cfg.makeProfile();
Assert.assertTrue(profile.isEmpty());
Assert.assertEquals(profile.parser, genomeLocParser);
for ( int i = 0; i < cfg.probs.size(); i++ ) {
double p = cfg.probs.get(i);
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
profile.add(new ActivityProfileState(loc, p));
Assert.assertFalse(profile.isEmpty());
}
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
Assert.assertEquals(profile.size(), cfg.probs.size());
assertProbsAreEqual(profile.isActiveList, cfg.probs);
assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
Assert.assertEquals(profile.createDerivedProfile(profile.isActiveList).getClass(), profile.getClass());
final List<ActivityProfileState> empty = new LinkedList<ActivityProfileState>();
Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0);
}
private void assertRegionsAreEqual(List<ActiveRegion> actual, List<ActiveRegion> expected) {
Assert.assertEquals(actual.size(), expected.size());
for ( int i = 0; i < actual.size(); i++ ) {
Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i)));
}
}
private void assertProbsAreEqual(List<ActivityProfileState> actual, List<Double> expected) {
Assert.assertEquals(actual.size(), expected.size());
for ( int i = 0; i < actual.size(); i++ ) {
Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));
}
}
// todo -- test extensions
}