diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java deleted file mode 100644 index 8d6012fac..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.activeregion; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Class holding information about per-base activity scores for the - * active region traversal - * - * @author Mark DePristo - * @since Date created - */ -public class ActivityProfile { - private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author - - protected final List isActiveList; - protected final GenomeLocParser parser; - - protected GenomeLoc regionStartLoc = null; - protected GenomeLoc regionStopLoc = null; - - public ActivityProfile(final GenomeLocParser parser) { - this(parser, new ArrayList(), null); - } - - protected ActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { - this.parser = parser; - this.isActiveList = isActiveList; - this.regionStartLoc = regionStartLoc; - } - - /** - * Create a profile of the same class as this object containing just the provided stateList - * - * Used by clients to create derived activity profiles (such as ones without the starting X - * sites because they've been removed in an ActiveRegion) of the same class. - * - * @param isActiveList the active results list to use in the derived instance - * @return a freshly allocated data set - */ - protected ActivityProfile createDerivedProfile(final List isActiveList) { - return new ActivityProfile(parser, isActiveList, regionStartLoc); - } - - @Override - public String toString() { - return "ActivityProfile{" + - "start=" + regionStartLoc + - ", stop=" + regionStopLoc + - '}'; - } - - /** - * Add the next ActivityProfileState to this profile. - * - * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown - * - * @param result a well-formed ActivityProfileState result to incorporate into this profile - */ - @Requires("result != null") - public void add(final ActivityProfileState result) { - final GenomeLoc loc = result.getLoc(); - - if ( regionStartLoc == null ) { - regionStartLoc = loc; - regionStopLoc = loc; - } else { - if ( regionStopLoc.getStart() != loc.getStart() - 1 ) - throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); - regionStopLoc = loc; - } - - isActiveList.add(result); - } - - /** - * How many profile results are in this profile? - * @return the number of profile results - */ - @Ensures("result >= 0") - public int size() { - return isActiveList.size(); - } - - /** - * Is this profile empty? - * @return true if the profile is empty - */ - @Ensures("isEmpty() == (size() == 0)") - public boolean isEmpty() { - return isActiveList.isEmpty(); - } - - /** - * Get the list of active profile results in this object - * @return a non-null, ordered list of active profile results - */ - @Ensures("result != null") - protected List getActiveList() { - return isActiveList; - } - - /** - * Finalize the probabilities in this activity profile, preparing it for a future - * call to createActiveRegions. This function returns a new profile with cleaned - * up activity estimates. - * - * This code looks at the current list of states, cleans them up, and then returns - * a newly allocated ActivityProfile - * - * @return a newly allocated ActivityProfile based on the current state of this - * profile, but that has been "finalized" as required by the profile implementation - */ - public ActivityProfile finalizeProfile() { - int iii = 0; - for( final double prob : finalizeProbabilities() ) { - final ActivityProfileState result = isActiveList.get(iii++); - result.isActiveProb = prob; - result.resultState = ActivityProfileState.Type.NONE; - result.resultValue = null; - } - - return createDerivedProfile(isActiveList); - } - - public double[] finalizeProbabilities() { - final double[] activeProbArray = new double[isActiveList.size()]; - - int iii = 0; - for( final ActivityProfileState result : isActiveList ) { - activeProbArray[iii++] = result.isActiveProb; - } - - iii = 0; - for( final ActivityProfileState result : isActiveList ) { - if( result.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups - final int numHQClips = result.resultValue.intValue(); - for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { - activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); - } - } - iii++; - } - - return activeProbArray; - } - - /** - * Partition this profile into active regions - * @param activeRegionExtension the amount of margin overlap in the active region - * @return the list of active regions - */ - public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { - final ArrayList returnList = new ArrayList(); - - if( isActiveList.size() == 0 ) { - // no elements in the active list, just return an empty one - return Collections.emptyList(); - } else if( isActiveList.size() == 1 ) { - // there's a single element, it's either active or inactive - boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; - returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize)); - } else { - // there are 2+ elements, divide these up into regions - boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; - int curStart = 0; - for(int iii = 1; iii < isActiveList.size(); iii++ ) { - final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD; - if( isActive != thisStatus ) { - returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize)); - isActive = thisStatus; - curStart = iii; - } - } - returnList.addAll(createActiveRegion(isActive, curStart, isActiveList.size() - 1, activeRegionExtension, maxRegionSize)); // close out the current active region - } - return returnList; - } - - /** - * Helper routine to create an active region based on our current start and end offsets - * @param isActive should the region be active? - * @param curStart offset (0-based) from the start of this region - * @param curEnd offset (0-based) from the start of this region - * @param activeRegionExtension the amount of margin overlap in the active region - * @return a fully initialized ActiveRegion with the above properties - */ - private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { - return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); - } - - private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { - if( !isActive || curEnd - curStart < maxRegionSize ) { - final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); - returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension)); - return returnList; - } - // find the best place to break up the large active region - Double minProb = Double.MAX_VALUE; - int cutPoint = -1; - - final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { - if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } - } - final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); - final List rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); - returnList.addAll( leftList ); - returnList.addAll( rightList ); - return returnList; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java deleted file mode 100644 index cef700419..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java +++ /dev/null @@ -1,84 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils.activeregion; - -import org.apache.commons.lang.ArrayUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; - -import java.util.ArrayList; -import java.util.List; - -/** - * - * - * @author Mark DePristo - * @since 2011 - */ -public class BandPassActivityProfile extends ActivityProfile { - private static final int FILTER_SIZE = 80; - private static final double[] GaussianKernel; - - static { - GaussianKernel = new double[2*FILTER_SIZE + 1]; - for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); - } - } - - public BandPassActivityProfile(final GenomeLocParser parser) { - this(parser, new ArrayList(), null); - } - - public BandPassActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { - super(parser, isActiveList, regionStartLoc); - } - - @Override - protected ActivityProfile createDerivedProfile(List isActiveList) { - return new BandPassActivityProfile(parser, isActiveList, regionStartLoc); - } - - /** - * Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered - * @return a new double[] that's the band-pass filtered version of this profile - */ - @Override - public double[] finalizeProbabilities() { - final double[] activeProbArray = super.finalizeProbabilities(); - final double[] bandPassProbArray = new double[activeProbArray.length]; - - // apply the band pass filter for activeProbArray into filteredProbArray - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); - bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); - } - - return bandPassProbArray; - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java deleted file mode 100644 index 430e0b5c6..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ /dev/null @@ -1,166 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils.activeregion; - - -// the imports for unit testing. - - -import net.sf.picard.reference.ReferenceSequenceFile; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; - - -public class ActivityProfileUnitTest extends BaseTest { - private GenomeLocParser genomeLocParser; - private GenomeLoc startLoc; - - @BeforeClass - public void init() throws FileNotFoundException { - // sequence - ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); - genomeLocParser = new GenomeLocParser(seq); - startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100); - } - - // -------------------------------------------------------------------------------- - // - // Basic tests Provider - // - // -------------------------------------------------------------------------------- - - private class BasicActivityProfileTestProvider extends TestDataProvider { - List probs; - List expectedRegions; - int extension = 0; - GenomeLoc regionStart = startLoc; - final ProfileType type; - - public BasicActivityProfileTestProvider(final ProfileType type, final List probs, boolean startActive, int ... startsAndStops) { - super(BasicActivityProfileTestProvider.class); - this.type = type; - this.probs = probs; - this.expectedRegions = toRegions(startActive, startsAndStops); - setName(getName()); - } - - private String getName() { - return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); - } - - public ActivityProfile makeProfile() { - switch ( type ) { - case Base: return new ActivityProfile(genomeLocParser); - case BandPass: return new BandPassActivityProfile(genomeLocParser); - default: throw new IllegalStateException(type.toString()); - } - } - - private List toRegions(boolean isActive, int[] startsAndStops) { - List l = new ArrayList(); - for ( int i = 0; i < startsAndStops.length - 1; i++) { - int start = regionStart.getStart() + startsAndStops[i]; - int end = regionStart.getStart() + startsAndStops[i+1] - 1; - GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); - l.add(r); - isActive = ! isActive; - } - return l; - } - } - - private enum ProfileType { - Base, BandPass - } - - @DataProvider(name = "BasicActivityProfileTestProvider") - public Object[][] makeQualIntervalTestProvider() { - for ( final ProfileType type : ProfileType.values() ) { - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); - } - - return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); - } - - @Test(dataProvider = "BasicActivityProfileTestProvider") - public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - ActivityProfile profile = cfg.makeProfile(); - - Assert.assertTrue(profile.isEmpty()); - - Assert.assertEquals(profile.parser, genomeLocParser); - - for ( int i = 0; i < cfg.probs.size(); i++ ) { - double p = cfg.probs.get(i); - GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(new ActivityProfileState(loc, p)); - Assert.assertFalse(profile.isEmpty()); - } - Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); - - Assert.assertEquals(profile.size(), cfg.probs.size()); - assertProbsAreEqual(profile.isActiveList, cfg.probs); - - assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); - - Assert.assertEquals(profile.createDerivedProfile(profile.isActiveList).getClass(), profile.getClass()); - - final List empty = new LinkedList(); - Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); - } - - private void assertRegionsAreEqual(List actual, List expected) { - Assert.assertEquals(actual.size(), expected.size()); - for ( int i = 0; i < actual.size(); i++ ) { - Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i))); - } - } - - private void assertProbsAreEqual(List actual, List expected) { - Assert.assertEquals(actual.size(), expected.size()); - for ( int i = 0; i < actual.size(); i++ ) { - Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); - } - } - - // todo -- test extensions -} \ No newline at end of file