diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 08de5a6aa..74ff77e4b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -57,7 +57,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -74,12 +74,12 @@ public class FindCoveredIntervals extends ActiveRegionWalker { @Override // Look to see if the region has sufficient coverage - public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + public ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); + return new ActivityProfileState(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 26f2560b7..9bb04421c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; @@ -71,7 +70,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; @@ -382,7 +381,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Override @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) - public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { + public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { @@ -391,15 +390,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return new ActivityProfileResult(ref.getLocus(), 1.0); + return new ActivityProfileState(ref.getLocus(), 1.0); } } if( USE_ALLELES_TRIGGER ) { - return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileState( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); } + if( context == null ) { return new ActivityProfileState(ref.getLocus(), 0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); @@ -436,7 +435,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); - return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); + return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index a7e4d7649..de0bfd1f1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -41,7 +41,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -71,6 +72,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine myReads = new LinkedList(); private GenomeLoc spanOfLastReadSeen = null; + @Override + public void initialize(GenomeAnalysisEngine engine, Walker walker, ProgressMeter progressMeter) { + super.initialize(engine, walker, progressMeter); + + final ActiveRegionWalker arWalker = (ActiveRegionWalker)walker; + if ( arWalker.wantsExtendedReads() && ! arWalker.wantsNonPrimaryReads() ) { + throw new IllegalArgumentException("Active region walker " + arWalker + " requested extended events but not " + + "non-primary reads, an inconsistent state. Please modify the walker"); + } + + activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); + maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); + walkerHasPresetRegions = arWalker.hasPresetActiveRegions(); + } + + // ------------------------------------------------------------------------------------- + // + // Utility functions + // + // ------------------------------------------------------------------------------------- + protected int getActiveRegionExtension() { return activeRegionExtension; } @@ -97,19 +120,6 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, + final LocusShardDataProvider dataProvider, + final LocusView locusView) { + if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) + return new ManagingReferenceOrderedView( dataProvider ); + else + return (RodLocusView)locusView; + } + + + // ------------------------------------------------------------------------------------- + // + // Working with ActivityProfiles and Active Regions + // + // ------------------------------------------------------------------------------------- + /** * Take the individual isActive calls and integrate them into contiguous active regions and * add these blocks of work to the work queue @@ -133,28 +159,26 @@ public class TraverseActiveRegions extends TraversalEngine walker, + protected final ActivityProfileState walkerActiveProb(final ActiveRegionWalker walker, final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext locus, final GenomeLoc location) { - if ( walker.hasPresetActiveRegions() ) { - return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); + if ( walkerHasPresetRegions ) { + return new ActivityProfileState(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); } else { return walker.isActive( tracker, refContext, locus ); } } - protected ReferenceOrderedView getReferenceOrderedView(final ActiveRegionWalker walker, - final LocusShardDataProvider dataProvider, - final LocusView locusView) { - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - return new ManagingReferenceOrderedView( dataProvider ); + private ActivityProfile makeNewActivityProfile() { + if ( walkerHasPresetRegions ) + return new ActivityProfile(engine.getGenomeLocParser()); else - return (RodLocusView)locusView; + return new BandPassActivityProfile(engine.getGenomeLocParser()); } /** @@ -171,6 +195,12 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = new LinkedList(); - ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); + ActivityProfile profile = makeNewActivityProfile(); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); @@ -245,9 +269,8 @@ public class TraverseActiveRegions extends TraversalEngine reads = locusView.getLIBS().transferReadsFromAllPreviousPileups(); for( final GATKSAMRecord read : reads ) { if ( appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index f937c2458..820100f7f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -114,7 +114,7 @@ public abstract class ActiveRegionWalker extends Walker= 0.0", "result.isActiveProb <= 1.0"}) - public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); + public abstract ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); // Map over the ActiveRegion public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 909d99424..fd05ddd7b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -1,35 +1,34 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; import java.util.ArrayList; import java.util.Collections; @@ -43,36 +42,37 @@ import java.util.List; * @since Date created */ public class ActivityProfile { - final GenomeLocParser parser; - final boolean presetRegions; - GenomeLoc regionStartLoc = null; - GenomeLoc regionStopLoc = null; - final List isActiveList; - private static final int FILTER_SIZE = 80; - private static final double[] GaussianKernel; + private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author - static { - GaussianKernel = new double[2*FILTER_SIZE + 1]; - for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); - } + protected final List isActiveList; + protected final GenomeLocParser parser; + + protected GenomeLoc regionStartLoc = null; + protected GenomeLoc regionStopLoc = null; + + public ActivityProfile(final GenomeLocParser parser) { + this(parser, new ArrayList(), null); } - // todo -- add upfront the start and stop of the intervals - // todo -- check that no regions are unexpectedly missing - // todo -- add unit tests - // TODO -- own preset regions - public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { - this(parser, presetRegions, new ArrayList(), null); - } - - protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + protected ActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { this.parser = parser; - this.presetRegions = presetRegions; this.isActiveList = isActiveList; this.regionStartLoc = regionStartLoc; } + /** + * Create a profile of the same class as this object containing just the provided isActiveList + * + * Used by clients to create derived activity profiles (such as ones without the starting X + * sites because they've been removed in an ActiveRegion) of the same class. + * + * @param isActiveList the active results list to use in the derived instance + * @return a freshly allocated data set + */ + protected ActivityProfile createDerivedProfile(final List isActiveList) { + return new ActivityProfile(parser, isActiveList, regionStartLoc); + } + @Override public String toString() { return "ActivityProfile{" + @@ -82,14 +82,14 @@ public class ActivityProfile { } /** - * Add the next ActivityProfileResult to this profile. + * Add the next ActivityProfileState to this profile. * * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown * - * @param result a well-formed ActivityProfileResult result to incorporate into this profile + * @param result a well-formed ActivityProfileState result to incorporate into this profile */ @Requires("result != null") - public void add(final ActivityProfileResult result) { + public void add(final ActivityProfileState result) { final GenomeLoc loc = result.getLoc(); if ( regionStartLoc == null ) { @@ -104,31 +104,67 @@ public class ActivityProfile { isActiveList.add(result); } + /** + * How many profile results are in this profile? + * @return the number of profile results + */ + @Ensures("result >= 0") public int size() { return isActiveList.size(); } + /** + * Is this profile empty? + * @return true if the profile is empty + */ + @Ensures("isEmpty() == (size() == 0)") public boolean isEmpty() { return isActiveList.isEmpty(); } - public boolean hasPresetRegions() { - return presetRegions; + /** + * Get the list of active profile results in this object + * @return a non-null, ordered list of active profile results + */ + @Ensures("result != null") + protected List getActiveList() { + return isActiveList; } /** - * Band pass this ActivityProfile, producing a new profile that's band pass filtered - * @return a new ActivityProfile that's the band-pass filtered version of this profile + * Finalize the probabilities in this activity profile, preparing it for a future + * call to createActiveRegions. This function returns a new profile with cleaned + * up activity estimates. + * + * This code looks at the current list of states, cleans them up, and then returns + * a newly allocated ActivityProfile + * + * @return a newly allocated ActivityProfile based on the current state of this + * profile, but that has been "finalized" as required by the profile implementation */ - public ActivityProfile bandPassFilter() { - final double[] activeProbArray = new double[isActiveList.size()]; + public ActivityProfile finalizeProfile() { int iii = 0; - for( final ActivityProfileResult result : isActiveList ) { + for( final double prob : finalizeProbabilities() ) { + final ActivityProfileState result = isActiveList.get(iii++); + result.isActiveProb = prob; + result.resultState = ActivityProfileState.Type.NONE; + result.resultValue = null; + } + + return createDerivedProfile(isActiveList); + } + + public double[] finalizeProbabilities() { + final double[] activeProbArray = new double[isActiveList.size()]; + + int iii = 0; + for( final ActivityProfileState result : isActiveList ) { activeProbArray[iii++] = result.isActiveProb; } + iii = 0; - for( final ActivityProfileResult result : isActiveList ) { - if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + for( final ActivityProfileState result : isActiveList ) { + if( result.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups final int numHQClips = result.resultValue.intValue(); for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); @@ -137,29 +173,7 @@ public class ActivityProfile { iii++; } - final double[] filteredProbArray; - if( !presetRegions ) { - // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray - filteredProbArray = new double[activeProbArray.length]; - for( iii = 0; iii < activeProbArray.length; iii++ ) { - final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); - filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); - } - } else { - // otherwise we simply use the activeProbArray directly - filteredProbArray = activeProbArray; - } - - iii = 0; - for( final double prob : filteredProbArray ) { - final ActivityProfileResult result = isActiveList.get(iii++); - result.isActiveProb = prob; - result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; - result.resultValue = null; - } - - return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); + return activeProbArray; } /** @@ -168,7 +182,6 @@ public class ActivityProfile { * @return the list of active regions */ public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { - final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author final ArrayList returnList = new ArrayList(); if( isActiveList.size() == 0 ) { @@ -203,11 +216,11 @@ public class ActivityProfile { * @param activeRegionExtension the amount of margin overlap in the active region * @return a fully initialized ActiveRegion with the above properties */ - private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { + private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); } - private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { + private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { if( !isActive || curEnd - curStart < maxRegionSize ) { final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension)); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index bf2636465..38e89b605 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -34,40 +34,40 @@ import org.broadinstitute.sting.utils.GenomeLoc; * User: rpoplin * Date: 7/27/12 */ -public class ActivityProfileResult { +public class ActivityProfileState { private GenomeLoc loc; public double isActiveProb; - public ActivityProfileResultState resultState; + public Type resultState; public Number resultValue; - public enum ActivityProfileResultState { + public enum Type { NONE, HIGH_QUALITY_SOFT_CLIPS } /** - * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb + * Create a new ActivityProfileState at loc with probability of being active of isActiveProb * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) */ @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) - public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) { - this(loc, isActiveProb, ActivityProfileResultState.NONE, null); + public ActivityProfileState(final GenomeLoc loc, final double isActiveProb) { + this(loc, isActiveProb, Type.NONE, null); } /** - * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some + * Create a new ActivityProfileState at loc with probability of being active of isActiveProb that maintains some * information about the result state and value (TODO RYAN -- what do these mean?) * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) */ @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) - public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + public ActivityProfileState(final GenomeLoc loc, final double isActiveProb, final Type resultState, final Number resultValue) { // make sure the location of that activity profile is 1 if ( loc.size() != 1 ) - throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc); + throw new IllegalArgumentException("Location for an ActivityProfileState must have to size 1 bp but saw " + loc); this.loc = loc; this.isActiveProb = isActiveProb; @@ -76,7 +76,7 @@ public class ActivityProfileResult { } /** - * Get the genome loc associated with the ActivityProfileResult + * Get the genome loc associated with the ActivityProfileState * @return the location of this result */ @Ensures("result != null") @@ -86,7 +86,7 @@ public class ActivityProfileResult { @Override public String toString() { - return "ActivityProfileResult{" + + return "ActivityProfileState{" + "loc=" + loc + ", isActiveProb=" + isActiveProb + ", resultState=" + resultState + diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java new file mode 100644 index 000000000..cef700419 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -0,0 +1,84 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.activeregion; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * + * + * @author Mark DePristo + * @since 2011 + */ +public class BandPassActivityProfile extends ActivityProfile { + private static final int FILTER_SIZE = 80; + private static final double[] GaussianKernel; + + static { + GaussianKernel = new double[2*FILTER_SIZE + 1]; + for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); + } + } + + public BandPassActivityProfile(final GenomeLocParser parser) { + this(parser, new ArrayList(), null); + } + + public BandPassActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { + super(parser, isActiveList, regionStartLoc); + } + + @Override + protected ActivityProfile createDerivedProfile(List isActiveList) { + return new BandPassActivityProfile(parser, isActiveList, regionStartLoc); + } + + /** + * Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered + * @return a new double[] that's the band-pass filtered version of this profile + */ + @Override + public double[] finalizeProbabilities() { + final double[] activeProbArray = super.finalizeProbabilities(); + final double[] bandPassProbArray = new double[activeProbArray.length]; + + // apply the band pass filter for activeProbArray into filteredProbArray + for( int iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); + bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); + } + + return bandPassProbArray; + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java index 76be54d72..f09a4b3e8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import java.util.*; @@ -80,10 +80,10 @@ class DummyActiveRegionWalker extends ActiveRegionWalker { } @Override - public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { isActiveCalls.add(ref.getLocus()); final double p = activeRegions == null || activeRegions.overlaps(ref.getLocus()) ? prob : 0.0; - return new ActivityProfileResult(ref.getLocus(), p); + return new ActivityProfileState(ref.getLocus(), p); } @Override diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java similarity index 79% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java index d131666fa..019cf82da 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java @@ -25,23 +25,17 @@ package org.broadinstitute.sting.utils.activeregion; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; -import java.util.EnumSet; import java.util.LinkedList; import java.util.List; @@ -52,7 +46,7 @@ import java.util.List; * Time: 2:30 PM * To change this template use File | Settings | File Templates. */ -public class ActivityProfileResultTest { +public class ActivityProfileStateUnitTest { private GenomeLocParser genomeLocParser; @BeforeClass @@ -71,7 +65,7 @@ public class ActivityProfileResultTest { genomeLocParser.createGenomeLoc(chr, 10, 10), genomeLocParser.createGenomeLoc(chr, 100, 100) )) { for ( final double prob : Arrays.asList(0.0, 0.5, 1.0) ) { - for ( final ActivityProfileResult.ActivityProfileResultState state : ActivityProfileResult.ActivityProfileResultState.values() ) { + for ( final ActivityProfileState.Type state : ActivityProfileState.Type.values() ) { for ( final Number value : Arrays.asList(1, 2, 4) ) { tests.add(new Object[]{ loc, prob, state, value}); } @@ -84,15 +78,15 @@ public class ActivityProfileResultTest { } @Test(dataProvider = "ActiveProfileResultProvider") - public void testActiveProfileResultProvider(GenomeLoc loc, final double prob, ActivityProfileResult.ActivityProfileResultState maybeState, final Number maybeNumber) { - final ActivityProfileResult apr = maybeState == null - ? new ActivityProfileResult(loc, prob) - : new ActivityProfileResult(loc, prob, maybeState, maybeNumber); + public void testActiveProfileResultProvider(GenomeLoc loc, final double prob, ActivityProfileState.Type maybeState, final Number maybeNumber) { + final ActivityProfileState apr = maybeState == null + ? new ActivityProfileState(loc, prob) + : new ActivityProfileState(loc, prob, maybeState, maybeNumber); Assert.assertEquals(apr.getLoc(), loc); Assert.assertNotNull(apr.toString()); Assert.assertEquals(apr.isActiveProb, prob); - Assert.assertEquals(apr.resultState, maybeState == null ? ActivityProfileResult.ActivityProfileResultState.NONE : maybeState); + Assert.assertEquals(apr.resultState, maybeState == null ? ActivityProfileState.Type.NONE : maybeState); Assert.assertEquals(apr.resultValue, maybeState == null ? null : maybeNumber); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index ff27037d3..430e0b5c6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -42,9 +42,7 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; public class ActivityProfileUnitTest extends BaseTest { @@ -70,23 +68,26 @@ public class ActivityProfileUnitTest extends BaseTest { List expectedRegions; int extension = 0; GenomeLoc regionStart = startLoc; + final ProfileType type; - public BasicActivityProfileTestProvider(final List probs, final List expectedRegions) { - super(BasicActivityProfileTestProvider.class); - this.probs = probs; - this.expectedRegions = expectedRegions; - setName(getName()); - } - - public BasicActivityProfileTestProvider(final List probs, boolean startActive, int ... startsAndStops) { + public BasicActivityProfileTestProvider(final ProfileType type, final List probs, boolean startActive, int ... startsAndStops) { super(BasicActivityProfileTestProvider.class); + this.type = type; this.probs = probs; this.expectedRegions = toRegions(startActive, startsAndStops); setName(getName()); } private String getName() { - return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions)); + return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); + } + + public ActivityProfile makeProfile() { + switch ( type ) { + case Base: return new ActivityProfile(genomeLocParser); + case BandPass: return new BandPassActivityProfile(genomeLocParser); + default: throw new IllegalStateException(type.toString()); + } } private List toRegions(boolean isActive, int[] startsAndStops) { @@ -103,27 +104,36 @@ public class ActivityProfileUnitTest extends BaseTest { } } + private enum ProfileType { + Base, BandPass + } + @DataProvider(name = "BasicActivityProfileTestProvider") public Object[][] makeQualIntervalTestProvider() { - new BasicActivityProfileTestProvider(Arrays.asList(1.0), true, 0, 1); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0), true, 0, 1, 2); - new BasicActivityProfileTestProvider(Arrays.asList(0.0, 1.0), false, 0, 1, 2); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + for ( final ProfileType type : ProfileType.values() ) { + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + } return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); } @Test(dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - ActivityProfile profile = new ActivityProfile(genomeLocParser, false); + ActivityProfile profile = cfg.makeProfile(); + + Assert.assertTrue(profile.isEmpty()); Assert.assertEquals(profile.parser, genomeLocParser); for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(new ActivityProfileResult(loc, p)); + profile.add(new ActivityProfileState(loc, p)); + Assert.assertFalse(profile.isEmpty()); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); @@ -131,6 +141,11 @@ public class ActivityProfileUnitTest extends BaseTest { assertProbsAreEqual(profile.isActiveList, cfg.probs); assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); + + Assert.assertEquals(profile.createDerivedProfile(profile.isActiveList).getClass(), profile.getClass()); + + final List empty = new LinkedList(); + Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); } private void assertRegionsAreEqual(List actual, List expected) { @@ -140,7 +155,7 @@ public class ActivityProfileUnitTest extends BaseTest { } } - private void assertProbsAreEqual(List actual, List expected) { + private void assertProbsAreEqual(List actual, List expected) { Assert.assertEquals(actual.size(), expected.size()); for ( int i = 0; i < actual.size(); i++ ) { Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));