diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index bc7d5c6ca..3bc336124 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -344,8 +344,12 @@ public class GATKRunReport { @Element(required = false, name = "is-user-exception") Boolean isUserException; + @Element(required = false, name = "exception-class") + Class exceptionClass; + public ExceptionToXML(Throwable e) { message = e.getMessage(); + exceptionClass = e.getClass(); isUserException = e instanceof UserException; for (StackTraceElement element : e.getStackTrace()) { stackTrace.add(element.toString()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 3f24e6585..f9a185650 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; +import org.broadinstitute.sting.utils.activeregion.ActivityProfile; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -42,38 +44,31 @@ public class TraverseActiveRegions extends TraversalEngine isActiveList = new ArrayList(); - GenomeLoc firstIsActiveStart = null; + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); - //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider ); - ReferenceOrderedView referenceOrderedDataView = null; - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider ); - else - referenceOrderedDataView = (RodLocusView)locusView; + ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); // We keep processing while the next reference location is within the interval GenomeLoc prevLoc = null; while( locusView.hasNext() ) { final AlignmentContext locus = locusView.next(); GenomeLoc location = locus.getLocation(); + if(prevLoc != null) { - for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) { + // fill in the active / inactive labels from the stop of the previous location to the start of this location + // TODO refactor to separate function + for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) { final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) { - final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) ); - isActiveList.add( isActiveProb ); - if( firstIsActiveStart == null ) { - firstIsActiveStart = fakeLoc; - } + final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ); + profile.add(fakeLoc, isActiveProb); } } } @@ -89,12 +84,8 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension, walker.presetActiveRegions != null ); - logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." ); - if( walker.activeRegionOutStream == null ) { - workQueue.addAll( activeRegions ); - } else { // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : activeRegions ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } - } + // band-pass filter the list of isActive probabilities and turn into active regions + final ActivityProfile bandPassFiltered = profile.bandPassFilter(); + final List activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension ); - // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them - while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) { - final ActiveRegion activeRegion = workQueue.remove(); - sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker ); - } + // add active regions to queue of regions to process + workQueue.addAll( activeRegions ); + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + + // now go and process all of the active regions + sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); } return sum; } - // Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine - public T endTraversal( final Walker walker, T sum) { + + // -------------------------------------------------------------------------------- + // + // simple utility functions + // + // -------------------------------------------------------------------------------- + + private final double walkerActiveProb(final ActiveRegionWalker walker, + final RefMetaDataTracker tracker, final ReferenceContext refContext, + final AlignmentContext locus, final GenomeLoc location) { + if ( walker.hasPresetActiveRegions() ) { + return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0; + } else { + return walker.isActive( tracker, refContext, locus ); + } + } + + private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker walker, + final LocusShardDataProvider dataProvider, + final LocusView locusView) { + if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) + return new ManagingReferenceOrderedView( dataProvider ); + else + return (RodLocusView)locusView; + } + + // -------------------------------------------------------------------------------- + // + // code to handle processing active regions + // + // -------------------------------------------------------------------------------- + + private T processActiveRegions( final ActiveRegionWalker walker, T sum, final int minStart, final String currentContig ) { + if( walker.activeRegionOutStream != null ) { + writeActiveRegionsToStream(walker); + return sum; + } else { + return callWalkerMapOnActiveRegions(walker, sum, minStart, currentContig); + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param walker + */ + private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { + // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : workQueue ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + + private T callWalkerMapOnActiveRegions( final ActiveRegionWalker walker, T sum, final int minStart, final String currentContig ) { + // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them + // TODO can implement parallel traversal here while( workQueue.peek() != null ) { - final ActiveRegion activeRegion = workQueue.remove(); - sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker) walker ); + final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc(); + if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) { + final ActiveRegion activeRegion = workQueue.remove(); + sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker ); + } else { + break; + } } return sum; @@ -193,6 +232,12 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine integrateActiveList( final ArrayList activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension, final boolean presetRegions ) { - - final double ACTIVE_PROB_THRESHOLD = 0.2; // BUGBUG: needs to be set-able by the walker author - final ArrayList returnList = new ArrayList(); - if( activeList.size() == 0 ) { - return returnList; - } else if( activeList.size() == 1 ) { - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart(), firstIsActiveStart.getStart()), - activeList.get(0) > ACTIVE_PROB_THRESHOLD, engine.getGenomeLocParser(), activeRegionExtension ) ); - return returnList; - } else { - final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]); - final double[] filteredProbArray = new double[activeProbArray.length]; - final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // BUGBUG: needs to be set-able by the walker author - final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // BUGBUG: needs to be set-able by the walker author - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - double maxVal = 0; - for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE+1); jjj++ ) { - if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; } - } - filteredProbArray[iii] = maxVal; - } - - boolean curStatus = filteredProbArray[0] > ACTIVE_PROB_THRESHOLD; - int curStart = 0; - for(int iii = 1; iii < filteredProbArray.length; iii++ ) { - final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD; - if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) { - returnList.add( new ActiveRegion( - engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)), - curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); - curStatus = thisStatus; - curStart = iii; - } - } - if( curStart != filteredProbArray.length-1 ) { - returnList.add( new ActiveRegion( - engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)), - curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); - } - return returnList; - } + /** + * Special function called in LinearMicroScheduler to empty out the work queue. + * Ugly for now but will be cleaned up when we push this functionality more into the engine + */ + public T endTraversal( final Walker walker, T sum) { + return processActiveRegions((ActiveRegionWalker)walker, sum, Integer.MAX_VALUE, null); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 6403f15a2..8ff4b2f6f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -45,6 +45,10 @@ public abstract class ActiveRegionWalker extends Walker { @Argument(doc="Only output valid sequences.",fullName="onlyOutputValidAmplicons",required=false) boolean onlyOutputValidAmplicons = false; + /** + * If ignoreComplexEvents is true, the output fasta file will contain only sequences coming from SNPs and Indels. + * Complex substitutions will be ignored. + */ + @Argument(doc="Ignore complex genomic records.",fullName="ignoreComplexEvents",required=false) + boolean ignoreComplexEvents = false; + /** * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. * This changes the size of the k-mer used for alignment. @@ -146,6 +153,7 @@ public class ValidationAmplicons extends RodWalker { StringBuilder rawSequence; boolean sequenceInvalid; boolean isSiteSNP; + boolean isSiteIndel; List invReason; int indelCounter; @@ -244,6 +252,7 @@ public class ValidationAmplicons extends RodWalker { } else if ( validate != null ) { // record variant type in case it's needed in output format isSiteSNP = (validate.isSNP()); + isSiteIndel = (validate.isIndel()); // doesn't matter if there's a mask here too -- this is what we want to validate if ( validate.isFiltered() ) { logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); @@ -504,6 +513,9 @@ public class ValidationAmplicons extends RodWalker { } + if (ignoreComplexEvents && !isSiteIndel && !isSiteSNP) + return; + if (!onlyOutputValidAmplicons || !sequenceInvalid) { String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); if (sequenomOutput) { @@ -512,7 +524,7 @@ public class ValidationAmplicons extends RodWalker { out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); } else if (ilmnOutput) { - String type = isSiteSNP?"SNP":"INDEL"; + String type = isSiteSNP?"SNP":(isSiteIndel?"INDEL":"OTHER"); seqIdentity = seqIdentity.replace("*",""); // no * in ref allele out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index c2e69ee2d..37822dc84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -34,6 +34,11 @@ public class ActiveRegion implements HasGenomeLocation { fullExtentReferenceLoc = extendedLoc; } + @Override + public String toString() { + return "ActiveRegion " + activeRegionLoc.toString(); + } + // add each read to the bin and extend the reference genome activeRegionLoc if needed public void add( final GATKSAMRecord read ) { fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) ); @@ -78,4 +83,13 @@ public class ActiveRegion implements HasGenomeLocation { public void clearReads() { reads.clear(); } public void remove( final GATKSAMRecord read ) { reads.remove( read ); } public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } + + public boolean equalExceptReads(final ActiveRegion other) { + if ( ! activeRegionLoc.equals(other.activeRegionLoc)) return false; + if ( isActive != other.isActive ) return false; + if ( genomeLocParser != other.genomeLocParser ) return false; + if ( extension != other.extension ) return false; + if ( ! extendedLoc.equals(other.extendedLoc) ) return false; + return true; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java new file mode 100644 index 000000000..79b17cdba --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Class holding information about per-base activity scores for the + * active region traversal + * + * @author Mark DePristo + * @since Date created + */ +public class ActivityProfile { + final GenomeLocParser parser; + final boolean presetRegions; + GenomeLoc regionStartLoc = null; + final List isActiveList; + + private GenomeLoc lastLoc = null; + + // todo -- add upfront the start and stop of the intervals + // todo -- check that no regions are unexpectedly missing + // todo -- add unit tests + // TODO -- own preset regions + public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { + this(parser, presetRegions, new ArrayList(), null); + } + + protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + this.parser = parser; + this.presetRegions = presetRegions; + this.isActiveList = isActiveList; + this.regionStartLoc = regionStartLoc; + } + + public void add(final GenomeLoc loc, final double score) { + if ( loc.size() != 1 ) + throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); + if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) + throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); + isActiveList.add(score); + if( regionStartLoc == null ) { + regionStartLoc = loc; + } + } + + public int size() { + return isActiveList.size(); + } + + /** + * Band pass this ActivityProfile, producing a new profile that's band pass filtered + * @return a new ActivityProfile that's the band-pass filtered version of this profile + */ + public ActivityProfile bandPassFilter() { + final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]); + final Double[] filteredProbArray = new Double[activeProbArray.length]; + final int FILTER_SIZE = ( presetRegions ? 0 : 50 ); // TODO: needs to be set-able by the walker author + for( int iii = 0; iii < activeProbArray.length; iii++ ) { + double maxVal = 0; + for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(isActiveList.size(), iii+FILTER_SIZE+1); jjj++ ) { + if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; } + } + filteredProbArray[iii] = maxVal; + } + + return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc); + } + + /** + * Partition this profile into active regions + * @param activeRegionExtension + * @return + */ + public List createActiveRegions( final int activeRegionExtension ) { + final int MAX_ACTIVE_REGION = ( presetRegions ? 16001 : 425 ); // TODO: needs to be set-able by the walker author + final double ACTIVE_PROB_THRESHOLD = 0.2; // TODO: needs to be set-able by the walker author + + if( isActiveList.size() == 0 ) { + // no elements in the active list, just return an empty one + return Collections.emptyList(); + } else if( isActiveList.size() == 1 ) { + // there's a single element, it's either active or inactive + boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + final ActiveRegion region = createActiveRegion(isActive, 0, 0, activeRegionExtension ); + return Collections.singletonList(region); + } else { + // there are 2+ elements, divide these up into regions + final ArrayList returnList = new ArrayList(); + boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + int curStart = 0; + for(int iii = 1; iii < isActiveList.size(); iii++ ) { + final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD; + if( isActive != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) { + returnList.add( createActiveRegion(isActive, curStart, iii-1, activeRegionExtension) ); + isActive = thisStatus; + curStart = iii; + } + } + + if( curStart != isActiveList.size()-1 ) { + returnList.add( createActiveRegion(isActive, curStart, isActiveList.size()-1, activeRegionExtension) ); + } + return returnList; + } + } + + /** + * Helper routine to create an active region based on our current start and end offsets + * @param isActive should the region be active? + * @param curStart offset (0-based) from the start of this region + * @param curEnd offset (0-based) from the start of this region + * @param activeRegionExtension + * @return a fully initialized ActiveRegion with the above properties + */ + private final ActiveRegion createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension) { + final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); + return new ActiveRegion( loc, isActive, parser, activeRegionExtension ); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/activeregionqc/CountReadsInActiveRegionsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/activeregionqc/CountReadsInActiveRegionsIntegrationTest.java new file mode 100644 index 000000000..44cf87b45 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/activeregionqc/CountReadsInActiveRegionsIntegrationTest.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.activeregionqc; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Tests CountReadsInActiveRegions + */ +public class CountReadsInActiveRegionsIntegrationTest extends WalkerTest { + @Test + public void basicTest() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CountReadsInActiveRegions -R " + b37KGReference + " -I " + b37GoodNA12878BAM + " -L 20:10,000,000-10,200,000 -o %s", + 1, + Arrays.asList("fcd581aa6befe85c7297509fa7b34edf")); + executeTest("CountReadsInActiveRegions:", spec); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java new file mode 100644 index 000000000..e6d0322c0 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.activeregion; + + +// the imports for unit testing. + + +import net.sf.picard.reference.ReferenceSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.recalibration.QualQuantizer; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class ActivityProfileUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + private GenomeLoc startLoc; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100); + } + + // -------------------------------------------------------------------------------- + // + // Basic tests Provider + // + // -------------------------------------------------------------------------------- + + private class BasicActivityProfileTestProvider extends TestDataProvider { + List probs; + List expectedRegions; + int extension = 0; + GenomeLoc regionStart = startLoc; + + public BasicActivityProfileTestProvider(final List probs, final List expectedRegions) { + super(BasicActivityProfileTestProvider.class); + this.probs = probs; + this.expectedRegions = expectedRegions; + setName(getName()); + } + + public BasicActivityProfileTestProvider(final List probs, boolean startActive, int ... startsAndStops) { + super(BasicActivityProfileTestProvider.class); + this.probs = probs; + this.expectedRegions = toRegions(startActive, startsAndStops); + setName(getName()); + } + + private String getName() { + return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions)); + } + + private List toRegions(boolean isActive, int[] startsAndStops) { + List l = new ArrayList(); + for ( int i = 0; i < startsAndStops.length - 1; i++) { + int start = regionStart.getStart() + startsAndStops[i]; + int end = regionStart.getStart() + startsAndStops[i+1] - 1; + GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); + ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); + l.add(r); + isActive = ! isActive; + } + return l; + } + } + + @DataProvider(name = "BasicActivityProfileTestProvider") + public Object[][] makeQualIntervalTestProvider() { + new BasicActivityProfileTestProvider(Arrays.asList(1.0), true, 0, 1); + // TODO -- RYAN THESE ALL EXHIBIT AN OFF-BY-ONE ERROR. SORRY I HAVE TO GO BUT I CANNOT FIX NOW + //new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0), true, 0, 1, 2); + //new BasicActivityProfileTestProvider(Arrays.asList(0.0, 1.0), false, 0, 1, 2); + //new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + + return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); + } + + @Test(dataProvider = "BasicActivityProfileTestProvider") + public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { + ActivityProfile profile = new ActivityProfile(genomeLocParser, false); + + Assert.assertEquals(profile.parser, genomeLocParser); + + for ( int i = 0; i < cfg.probs.size(); i++ ) { + double p = cfg.probs.get(i); + GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); + profile.add(loc, p); + } + Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); + + Assert.assertEquals(profile.size(), cfg.probs.size()); + Assert.assertEquals(profile.isActiveList, cfg.probs); + + assertRegionsAreEqual(profile.createActiveRegions(0), cfg.expectedRegions); + } + + private void assertRegionsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i))); + } + } + + // todo -- test extensions +} \ No newline at end of file