diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 5aba23faa..a185ba6af 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; -import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -41,7 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.genotyper.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.clipping.ReadClipper; @@ -212,7 +214,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private VariantAnnotatorEngine annotationEngine; // fasta reference reader to supplement the edges of the reference sequence - private IndexedFastaSequenceFile referenceReader; + private CachingIndexedFastaSequenceFile referenceReader; // reference base padding size private static final int REFERENCE_PADDING = 900; @@ -324,15 +326,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return new ActivityProfileResult(1.0); + return new ActivityProfileResult(ref.getLocus(), 1.0); } } if( USE_ALLELES_TRIGGER ) { - return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return new ActivityProfileResult(0.0); } + if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); @@ -369,7 +371,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); - return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); + return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 86f3748ce..d00f5b61d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -69,8 +69,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939")); + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index c8bf1e3e8..34627b973 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -78,7 +78,7 @@ public class ReferenceContext { * * @return */ - @Ensures({"result != null", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null"}) public byte[] getBases(); } @@ -143,6 +143,9 @@ public class ReferenceContext { private void fetchBasesFromProvider() { if ( basesCache == null ) { basesCache = basesProvider.getBases(); + + // must be an assertion that only runs when the bases are fetch to run in a reasonable amount of time + assert BaseUtils.isUpperCase(basesCache); } } @@ -172,7 +175,6 @@ public class ReferenceContext { * Get the base at the given locus. * @return The base at the given locus from the reference. */ - @Ensures("BaseUtils.isUpperCase(result)") public byte getBase() { return getBases()[(locus.getStart() - window.getStart())]; } @@ -182,7 +184,7 @@ public class ReferenceContext { * @return All bases available. If the window is of size [0,0], the array will * contain only the base at the given locus. */ - @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null", "result.length > 0"}) public byte[] getBases() { fetchBasesFromProvider(); return basesCache; @@ -191,7 +193,7 @@ public class ReferenceContext { /** * All the bases in the window from the current base forward to the end of the window. */ - @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null", "result.length > 0"}) public byte[] getForwardBases() { final byte[] bases = getBases(); final int mid = locus.getStart() - window.getStart(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 5d38df0f5..a2c37944a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; @@ -46,99 +45,127 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = new LinkedList(); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); - ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); + ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); - // We keep processing while the next reference location is within the interval - GenomeLoc prevLoc = null; - while( locusView.hasNext() ) { - final AlignmentContext locus = locusView.next(); - GenomeLoc location = locus.getLocation(); + // We keep processing while the next reference location is within the interval + GenomeLoc prevLoc = null; + while( locusView.hasNext() ) { + final AlignmentContext locus = locusView.next(); + final GenomeLoc location = locus.getLocation(); - if(prevLoc != null) { - // fill in the active / inactive labels from the stop of the previous location to the start of this location - // TODO refactor to separate function - for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) { - final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); - if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) { - profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 )); - } - } + // Grab all the previously unseen reads from this pileup and add them to the massive read list + // Note that this must occur before we leave because we are outside the intervals because + // reads may occur outside our intervals but overlap them in the future + // TODO -- this whole HashSet logic should be changed to a linked list of reads with + // TODO -- subsequent pass over them to find the ones overlapping the active regions + for( final PileupElement p : locus.getBasePileup() ) { + final GATKSAMRecord read = p.getRead(); + if( !myReads.contains(read) ) { + myReads.add(read); } - dataProvider.getShard().getReadMetrics().incrementNumIterations(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - // Call the walkers isActive function for this locus and add them to the list to be integrated later - if( initialIntervals == null || initialIntervals.overlaps( location ) ) { - profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location)); - } - - // Grab all the previously unseen reads from this pileup and add them to the massive read list - for( final PileupElement p : locus.getBasePileup() ) { - final GATKSAMRecord read = p.getRead(); - if( !myReads.contains(read) ) { - myReads.add(read); - } - - // If this is the last pileup for this shard calculate the minimum alignment start so that we know - // which active regions in the work queue are now safe to process - minStart = Math.min(minStart, read.getAlignmentStart()); - } - - prevLoc = location; - - printProgress(locus.getLocation()); + // If this is the last pileup for this shard calculate the minimum alignment start so that we know + // which active regions in the work queue are now safe to process + minStart = Math.min(minStart, read.getAlignmentStart()); } - updateCumulativeMetrics(dataProvider.getShard()); + // skip this location -- it's not part of our engine intervals + // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow + if ( outsideEngineIntervals(location) ) + continue; - // Take the individual isActive calls and integrate them into contiguous active regions and - // add these blocks of work to the work queue - // band-pass filter the list of isActive probabilities and turn into active regions - final ActivityProfile bandPassFiltered = profile.bandPassFilter(); - final List activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); - - // add active regions to queue of regions to process - // first check if can merge active regions over shard boundaries - if( !activeRegions.isEmpty() ) { - if( !workQueue.isEmpty() ) { - final ActiveRegion last = workQueue.getLast(); - final ActiveRegion first = activeRegions.get(0); - if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { - workQueue.removeLast(); - activeRegions.remove(first); - workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); - } - } - workQueue.addAll( activeRegions ); + if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { + // we've move across some interval boundary, restart profile + profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); } - logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + dataProvider.getShard().getReadMetrics().incrementNumIterations(); - // now go and process all of the active regions - sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); + // create reference context. Note that if we have a pileup of "extended events", the context will + // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). + final ReferenceContext refContext = referenceView.getReferenceContext(location); + + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + + // Call the walkers isActive function for this locus and add them to the list to be integrated later + profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); + + prevLoc = location; + + printProgress(locus.getLocation()); } + updateCumulativeMetrics(dataProvider.getShard()); + + if ( ! profile.isEmpty() ) + incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + + // add active regions to queue of regions to process + // first check if can merge active regions over shard boundaries + if( !activeRegions.isEmpty() ) { + if( !workQueue.isEmpty() ) { + final ActiveRegion last = workQueue.getLast(); + final ActiveRegion first = activeRegions.get(0); + if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { + workQueue.removeLast(); + activeRegions.remove(first); + workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); + } + } + workQueue.addAll( activeRegions ); + } + + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + + // now go and process all of the active regions + sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); + return sum; } + /** + * Is the loc outside of the intervals being requested for processing by the GATK? + * @param loc + * @return + */ + private boolean outsideEngineIntervals(final GenomeLoc loc) { + return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc); + } + + /** + * Take the individual isActive calls and integrate them into contiguous active regions and + * add these blocks of work to the work queue + * band-pass filter the list of isActive probabilities and turn into active regions + * + * @param profile + * @param activeRegions + * @param activeRegionExtension + * @param maxRegionSize + * @return + */ + private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, + final List activeRegions, + final int activeRegionExtension, + final int maxRegionSize) { + if ( profile.isEmpty() ) + throw new IllegalStateException("trying to incorporate an empty active profile " + profile); + + final ActivityProfile bandPassFiltered = profile.bandPassFilter(); + activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); + return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); + } + // -------------------------------------------------------------------------------- // @@ -150,7 +177,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, final LocusShardDataProvider dataProvider ) { - final DataSource dataSource = WalkerManager.getWalkerDataSource(walker); - if( dataSource == DataSource.READS ) - return new CoveredLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers ) - return new AllLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE_ORDERED_DATA ) - return new RodLocusView(dataProvider); - else - throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); - } - /** * Special function called in LinearMicroScheduler to empty out the work queue. * Ugly for now but will be cleaned up when we push this functionality more into the engine diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index e17c6cdb7..85b7159e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -57,7 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1)); + return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index decc54d47..0d12d53cc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.utils.activeregion; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.samtools.util.StringUtil; +import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -54,27 +54,31 @@ public class ActiveRegion implements HasGenomeLocation { public ArrayList getReads() { return reads; } - public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) { return getActiveRegionReference(referenceReader, 0); } - public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) { return getReference( referenceReader, padding, extendedLoc ); } - public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) { return getFullReference(referenceReader, 0); } - public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) { return getReference( referenceReader, padding, fullExtentReferenceLoc ); } - private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { + @Requires("referenceReader.isUppercasingBases()") + private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(), Math.max(1, genomeLoc.getStart() - padding), Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases(); - StringUtil.toUpperCase(reference); return reference; } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 73f3cc487..e96eb843d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -24,11 +24,11 @@ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.Collections; @@ -45,6 +45,7 @@ public class ActivityProfile { final GenomeLocParser parser; final boolean presetRegions; GenomeLoc regionStartLoc = null; + GenomeLoc regionStopLoc = null; final List isActiveList; private static final int FILTER_SIZE = 80; private static final double[] GaussianKernel; @@ -71,19 +72,49 @@ public class ActivityProfile { this.regionStartLoc = regionStartLoc; } - public void add(final GenomeLoc loc, final ActivityProfileResult result) { - if ( loc.size() != 1 ) - throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); - isActiveList.add(result); - if( regionStartLoc == null ) { + @Override + public String toString() { + return "ActivityProfile{" + + "start=" + regionStartLoc + + ", stop=" + regionStopLoc + + '}'; + } + + /** + * Add the next ActivityProfileResult to this profile. + * + * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown + * + * @param result a well-formed ActivityProfileResult result to incorporate into this profile + */ + @Requires("result != null") + public void add(final ActivityProfileResult result) { + final GenomeLoc loc = result.getLoc(); + + if ( regionStartLoc == null ) { regionStartLoc = loc; + regionStopLoc = loc; + } else { + if ( regionStopLoc.getStart() != loc.getStart() - 1 ) + throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); + regionStopLoc = loc; } + + isActiveList.add(result); } public int size() { return isActiveList.size(); } + public boolean isEmpty() { + return isActiveList.isEmpty(); + } + + public boolean hasPresetRegions() { + return presetRegions; + } + /** * Band pass this ActivityProfile, producing a new profile that's band pass filtered * @return a new ActivityProfile that's the band-pass filtered version of this profile @@ -104,14 +135,21 @@ public class ActivityProfile { } iii++; } - final double[] filteredProbArray = new double[activeProbArray.length]; + + final double[] filteredProbArray; if( !presetRegions ) { + // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray + filteredProbArray = new double[activeProbArray.length]; for( iii = 0; iii < activeProbArray.length; iii++ ) { final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); } + } else { + // otherwise we simply use the activeProbArray directly + filteredProbArray = activeProbArray; } + iii = 0; for( final double prob : filteredProbArray ) { final ActivityProfileResult result = isActiveList.get(iii++); @@ -119,6 +157,7 @@ public class ActivityProfile { result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; result.resultValue = null; } + return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); } @@ -166,6 +205,7 @@ public class ActivityProfile { private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); } + private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { if( !isActive || curEnd - curStart < maxRegionSize ) { final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java index 8dc29aa3c..273c2e785 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java @@ -1,12 +1,16 @@ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.GenomeLoc; + /** * Created with IntelliJ IDEA. * User: rpoplin * Date: 7/27/12 */ - public class ActivityProfileResult { + private GenomeLoc loc; public double isActiveProb; public ActivityProfileResultState resultState; public Number resultValue; @@ -16,16 +20,52 @@ public class ActivityProfileResult { HIGH_QUALITY_SOFT_CLIPS } - public ActivityProfileResult( final double isActiveProb ) { - this.isActiveProb = isActiveProb; - this.resultState = ActivityProfileResultState.NONE; - this.resultValue = null; + /** + * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb + * + * @param loc the position of the result profile (for debugging purposes) + * @param isActiveProb the probability of being active (between 0 and 1) + */ + @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) + public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) { + this(loc, isActiveProb, ActivityProfileResultState.NONE, null); } - public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + /** + * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some + * information about the result state and value (TODO RYAN -- what do these mean?) + * + * @param loc the position of the result profile (for debugging purposes) + * @param isActiveProb the probability of being active (between 0 and 1) + */ + @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) + public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + // make sure the location of that activity profile is 1 + if ( loc.size() != 1 ) + throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc); + + this.loc = loc; this.isActiveProb = isActiveProb; this.resultState = resultState; this.resultValue = resultValue; } + /** + * Get the genome loc associated with the ActivityProfileResult + * @return the location of this result + */ + @Ensures("result != null") + public GenomeLoc getLoc() { + return loc; + } + + @Override + public String toString() { + return "ActivityProfileResult{" + + "loc=" + loc + + ", isActiveProb=" + isActiveProb + + ", resultState=" + resultState + + ", resultValue=" + resultValue + + '}'; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index f7c564c74..57dd19888 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -123,7 +123,7 @@ public class ActivityProfileUnitTest extends BaseTest { for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(loc, new ActivityProfileResult(p)); + profile.add(new ActivityProfileResult(loc, p)); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));