From 612c3fdd9d1c35d11505569a6327f647a907d59f Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 17 Jun 2010 20:17:31 +0000 Subject: [PATCH] First pass at eliminating the old sharding system. Classes required for the original sharding system are gone where I could identify them, but hierarchies that split to support two sharding systems have not yet been taken apart. @Eric: ~4k lines. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3580 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 24 +- .../arguments/GATKArgumentCollection.java | 4 - .../shards/IndexDelimitedLocusShard.java | 14 +- .../IndexDelimitedLocusShardStrategy.java | 2 +- .../datasources/shards/IntervalShard.java | 77 --- .../shards/IntervalShardStrategy.java | 115 ----- .../shards/LinearLocusShardStrategy.java | 99 ---- .../shards/LocusShardStrategy.java | 275 ----------- .../sting/gatk/datasources/shards/Shard.java | 2 +- .../shards/ShardStrategyFactory.java | 15 - .../IndexDrivenSAMDataSource.java | 442 ------------------ .../simpleDataSources/ReadStreamPointer.java | 305 ------------ .../simpleDataSources/ReadStreamResource.java | 196 -------- .../simpleDataSources/SAMDataSource.java | 12 - .../simpleDataSources/SAMResourcePool.java | 193 -------- .../gatk/executive/LinearMicroScheduler.java | 2 +- .../gatk/iterators/BoundedReadIterator.java | 5 +- .../iterators/IntervalOverlapIterator.java | 113 ----- .../iterators/MergingSamRecordIterator2.java | 331 ------------- .../gatk/iterators/PlusOneFixIterator.java | 106 ----- .../sting/gatk/iterators/QueryIterator.java | 48 -- .../utils/sam/ArtificialSAMQueryIterator.java | 3 +- .../sting/utils/sam/ArtificialSAMUtils.java | 8 +- .../shards/IntervalShardStrategyUnitTest.java | 149 ------ .../shards/IntervalShardUnitTest.java | 76 --- .../LinearLocusShardStrategyUnitTest.java | 122 ----- .../shards/ShardStrategyFactoryUnitTest.java | 78 ---- .../ArtificialResourcePool.java | 102 ---- .../SAMBAMDataSourceUnitTest.java | 14 +- .../SAMByIntervalUnitTest.java | 151 ------ .../simpleDataSources/SAMByReadsUnitTest.java | 175 ------- .../BoundedReadIteratorUnitTest.java | 8 - .../IntervalOverlapIteratorUnitTest.java | 80 ---- .../iterators/PlusOneFixIteratorUnitTest.java | 120 ----- .../traversals/TraverseReadsUnitTest.java | 55 +-- 35 files changed, 28 insertions(+), 3493 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShard.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategy.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategy.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryUnitTest.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByIntervalUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsUnitTest.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIteratorUnitTest.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorUnitTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index a13f6bc9f..ef9ff8390 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -726,7 +726,7 @@ public class GenomeAnalysisEngine { ValidationExclusion exclusions) { // Use monolithic sharding if no index is present. Monolithic sharding is always required for the original // sharding system; it's required with the new sharding system only for locus walkers. - if(readsDataSource != null && !readsDataSource.hasIndex() && (argCollection.disableExperimentalSharding || walker instanceof LocusWalker)) { + if(readsDataSource != null && !readsDataSource.hasIndex() && walker instanceof LocusWalker) { if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM) || intervals != null) throw new StingException("The GATK cannot currently process unindexed BAM files"); @@ -756,27 +756,21 @@ public class GenomeAnalysisEngine { if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) Utils.scareUser("Locus walkers can only walk over coordinate-sorted data. Please resort your input BAM file."); - shardType = (walker.isReduceByInterval()) ? - ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL : - ShardStrategyFactory.SHATTER_STRATEGY.LINEAR; shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), - !argCollection.disableExperimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : shardType, + ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, intervals, maxIterations); } else shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), - !argCollection.disableExperimentalSharding ? ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL : ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, + ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, maxIterations); } else if (walker instanceof ReadWalker || walker instanceof DuplicateWalker) { - if(!argCollection.disableExperimentalSharding) - shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL; - else - shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS; + shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL; if (intervals != null && !intervals.isEmpty()) { shardStrategy = ShardStrategyFactory.shatter(readsDataSource, @@ -793,8 +787,6 @@ public class GenomeAnalysisEngine { SHARD_SIZE, maxIterations); } } else if (walker instanceof ReadPairWalker) { - if(argCollection.disableExperimentalSharding) - Utils.scareUser("Pairs traversal cannot be used in conjunction with the old sharding system."); if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname) Utils.scareUser("Read pair walkers can only walk over query name-sorted data. Please resort your input BAM file."); if(intervals != null && !intervals.isEmpty()) @@ -822,13 +814,7 @@ public class GenomeAnalysisEngine { if (reads.getReadsFiles().size() == 0) return null; - SAMDataSource dataSource = null; - if(!argCollection.disableExperimentalSharding) - dataSource = new BlockDrivenSAMDataSource(reads); - else - dataSource = new IndexDrivenSAMDataSource(reads); - - return dataSource; + return new BlockDrivenSAMDataSource(reads); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 3d737129c..7c94ec272 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -168,10 +168,6 @@ public class GATKArgumentCollection { // @Argument(fullName = "enableRodWalkers", shortName = "erw", doc = "Enable experimental rodWalker support. TEMPORARY HACK TO ALLOW EXPERIMENTATION WITH ROD WALKERS. [default is false]}.", required = false) // public boolean enableRodWalkers = false; - @Element(required = false) - @Argument(fullName = "disable_experimental_sharding",shortName="ds", doc="Disable the experimental sharding strategy.", required = false) - public boolean disableExperimentalSharding = false; - @ElementList(required = false) @Argument(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching : or a .txt file containing the filter strings one per line.", required = false) public List readGroupBlackList = null; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShard.java index 8719b8e6c..171038eb8 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShard.java @@ -46,24 +46,14 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa */ private final Map fileSpans; - /** - * An IndexDelimitedLocusShard can be used either for LOCUS or LOCUS_INTERVAL shard types. - * Track which type is being used. - */ - private final ShardType shardType; - /** * Create a new locus shard, divided by index. * @param intervals List of intervals to process. * @param fileSpans File spans associated with that interval. - * @param shardType Type of the shard; must be either LOCUS or LOCUS_INTERVAL. */ - IndexDelimitedLocusShard(List intervals, Map fileSpans, ShardType shardType) { + IndexDelimitedLocusShard(List intervals, Map fileSpans) { super(intervals); this.fileSpans = fileSpans; - if(shardType != ShardType.LOCUS && shardType != ShardType.LOCUS_INTERVAL) - throw new StingException("Attempted to create an IndexDelimitedLocusShard with invalid shard type: " + shardType); - this.shardType = shardType; } /** @@ -125,6 +115,6 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa */ @Override public ShardType getShardType() { - return shardType; + return ShardType.LOCUS; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java index 2504024d5..035c34ac9 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java @@ -127,7 +127,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy { public IndexDelimitedLocusShard next() { FilePointer nextFilePointer = filePointerIterator.next(); Map fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null; - return new IndexDelimitedLocusShard(nextFilePointer.locations,fileSpansBounding,Shard.ShardType.LOCUS_INTERVAL); + return new IndexDelimitedLocusShard(nextFilePointer.locations,fileSpansBounding); } /** we don't support the remove command */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShard.java deleted file mode 100755 index fd87f53ff..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShard.java +++ /dev/null @@ -1,77 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.Collections; -import java.util.List; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - *

- * Class IntervalShard - *

- * the base interval shard. All interval shards are generally the same, - * but must return their ShardType individually. - */ -public class IntervalShard implements Shard { - - /** a collection of genomic locations to interate over */ - private GenomeLoc mSet; - private Shard.ShardType mType = Shard.ShardType.LOCUS_INTERVAL; - - IntervalShard(GenomeLoc myLocation, Shard.ShardType intervalType) { - if (intervalType != Shard.ShardType.LOCUS_INTERVAL && intervalType != Shard.ShardType.READ_INTERVAL) - throw new IllegalArgumentException("The specified interval type must be either LOCUS_INTERVAL or READ_INTERVAL"); - mType = intervalType; - mSet = myLocation.clone(); - } - - /** @return the genome location represented by this shard */ - public List getGenomeLocs() { - return Collections.singletonList(mSet); - } - - /** - * returns the type of shard, READ - * - * @return READ, indicating the shard type - */ - public Shard.ShardType getShardType() { - return mType; - } - - /** - * String representation of this shard. - * @return A string representation of the boundaries of this shard. - */ - @Override - public String toString() { - return mSet.toString(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategy.java deleted file mode 100755 index 8eb2c750d..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategy.java +++ /dev/null @@ -1,115 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.StingException; - -import java.util.Iterator; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * @author aaron - *

- * Class ReadByIntervalShardStrategy - *

- * Impliments the sharding strategy for reads, given a list - * of genomic locations. Shards returned will be bounded by the interval, - * but each provided interval may be split into a number of smaller regions. - */ -public class IntervalShardStrategy implements ShardStrategy { - - /** our storage of the genomic locations they'd like to shard over */ - private final GenomeLocSortedSet regions; - - /** their prefered size of the shard, we can modify this based on what we see in the shards */ - private long size; - private Shard.ShardType type; - /** - * change the recommended shard size for the next shard we generate. The code will do it's - * best to respect this value, but there are no guarantees. - * - * @param size the next recommended shard size. - */ - public void adjustNextShardSize( long size ) { - this.size = size; // we don't do anything with the size in this case, but we should store it anyway - } - - /** - * construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs - * - * @param size - * @param locations - */ - IntervalShardStrategy( long size, GenomeLocSortedSet locations, Shard.ShardType shardType ) { - type = shardType; - this.regions = locations == null ? new GenomeLocSortedSet() : locations.clone(); - this.size = size; - } - - /** - * returns true if there are additional shards - * - * @return false if we're done processing shards - */ - public boolean hasNext() { - return ( !regions.isEmpty() ); - } - - /** - * gets the next Shard - * - * @return the next shard - */ - public Shard next() { - if (( this.regions == null ) || ( regions.isEmpty() )) { - throw new StingException("IntervalShardStrategy: genomic regions list is empty in next() function."); - } - - // get the first region in the list - GenomeLoc loc = regions.iterator().next(); - - regions.remove(loc); - return new IntervalShard(loc,type); - - } - - /** we don't support the remove command */ - public void remove() { - throw new UnsupportedOperationException("ShardStrategies don't support remove()"); - } - - /** - * makes the ReadIntervalShard iterable, i.e. usable in a for loop. - * - * @return - */ - public Iterator iterator() { - return this; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategy.java deleted file mode 100755 index 3e858ead4..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategy.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * @author aaron - * @version 1.0 - * @date Apr 6, 2009 - *

- * Class AdaptiveShard - *

- * allows you to change the sharding length as you traverse - */ -class LinearLocusShardStrategy extends LocusShardStrategy { - - // default the next size to 100,000 - private long nextShardSize = 100000; - - /** - * the constructor, taking a seq dictionary to parse out contigs - * - * @param dic the seq dictionary - */ - LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) { - super(dic); - this.limitingFactor = limitByCount; - this.nextShardSize = startSize; - } - - /** - * the constructor, constructing from another Locus strategy - * - * @param strat the shatter to convert from - */ - LinearLocusShardStrategy(LocusShardStrategy strat) { - super(strat); - this.nextShardSize = strat.nextShardSize(); - } - - - /** - * The constructor, for a genomic list, start size, and a reference dictionary - * @param dic the reference dictionary - * @param startSize the starting size of the shard - * @param lst locations to iterate from - */ - LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) { - super(dic, lst); - this.limitingFactor = limitByCount; - this.nextShardSize = startSize; - } - - - /** - * set the next shards size - * - * @param size adjust the next size to this - */ - public void adjustNextShardSize(long size) { - nextShardSize = size; - } - - /** - * This is how the various shards strategies implements their approach - * - * @return the next shard size - */ - protected long nextShardSize() { - return nextShardSize; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java deleted file mode 100755 index 68245cc1d..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ /dev/null @@ -1,275 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import net.sf.samtools.SAMSequenceDictionary; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.Iterator; -import java.util.Collections; -/** - * - * User: aaron - * Date: Apr 6, 2009 - * Time: 11:23:17 AM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - -/** - * @author aaron - * @version 1.0 - */ -public abstract class LocusShardStrategy implements ShardStrategy { - - // this stores the seq dictionary, which is a reference for the - // lengths and names of contigs, which you need to generate an iterative stratagy - protected final SAMSequenceDictionary dic; - - // the current genome location - protected GenomeLoc mLoc = null; - - // current seq location - protected int seqLoc = 0; - - // the actual last size; this can change based on contig endings - protected long lastGenomeLocSize = 0; - - // do we have another contig? - private boolean nextContig = false; - - /** our interal list * */ - private GenomeLocSortedSet intervals = null; - - /** our log, which we want to capture anything from this class */ - private static Logger logger = Logger.getLogger(LocusShardStrategy.class); - - /** the number of iterations before we stop */ - protected long limitingFactor = -1; - private boolean stopDueToLimitingFactor = false; - - /** - * the constructor, taking a seq dictionary to parse out contigs - * - * @param dic the seq dictionary - */ - LocusShardStrategy(SAMSequenceDictionary dic) { - this.dic = dic; - limitingFactor = -1; - mLoc = GenomeLocParser.createGenomeLoc(0, 0, 0); - if (dic.getSequences().size() > 0) { - nextContig = true; - } - } - - /** - * the copy constructor, - * - * @param old the old strategy - */ - LocusShardStrategy(LocusShardStrategy old) { - this.dic = old.dic; - this.mLoc = old.mLoc; - this.seqLoc = old.seqLoc; - this.lastGenomeLocSize = old.lastGenomeLocSize; - this.nextContig = old.nextContig; - this.limitingFactor = old.limitingFactor; - } - - - /** - * the constructor, taking a seq dictionary to parse out contigs - * - * @param dic the seq dictionary - * @param intervals file - */ - LocusShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) { - this.dic = dic; - this.intervals = intervals.clone(); - // set the starting point to the beginning interval - if (intervals.size() < 1) { - throw new IllegalArgumentException("Interval files must contain at least one interval"); - } - GenomeLoc loc = intervals.iterator().next(); - mLoc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1); - if (dic.getSequences().size() > 0) { - nextContig = true; - } - } - - /** - * - * Abstract methods that each strategy has to implement - * - */ - - /** - * This is how the various shards strategies implements their approach, adjusting this value - * - * @return the next shard size - */ - protected abstract long nextShardSize(); - - - /** - * - * Concrete methods that each strategy does not have to implement - * - */ - - - /** - * get the next shard, based on the return size of nextShardSize - * - * @return the next shard - */ - public Shard next() { - - // lets get some background info on the problem - long length = dic.getSequence(seqLoc).getSequenceLength(); - long proposedSize = nextShardSize(); - long nextStart = mLoc.getStop() + 1; - - if (this.limitingFactor > 0) { - if (proposedSize < limitingFactor) { - limitingFactor = limitingFactor - proposedSize; - } else { - proposedSize = limitingFactor; - this.stopDueToLimitingFactor = true; - } - } - - // if we don't have an interval set, use the non interval based approach. Simple, eh? - if (this.intervals == null) { - return nonIntervaledNext(length, proposedSize, nextStart); - } else { - return intervaledNext(); - } - - } - - /** - * Interval based next processing - * - * - * @return the shard that represents this data - */ - private Shard intervaledNext() { - if ((this.intervals == null) || (intervals.isEmpty())) { - throw new StingException("LocusShardStrategy: genomic regions list is empty in next() function."); - } - - // get the first region in the list - GenomeLoc loc = intervals.iterator().next(); - - intervals.remove(loc); - return new IntervalShard(loc, Shard.ShardType.LOCUS_INTERVAL); - - } - - /** - * Get the next shard, if we don't have intervals to traverse over - * - * @param length the length of the contig - * @param proposedSize the proposed size - * @param nextStart the next start location - * - * @return the shard to return to the user - */ - private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) { - // can we fit it into the current seq size? - if (nextStart + proposedSize - 1 < length) { - lastGenomeLocSize = proposedSize; - mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1); - return new LocusShard(Collections.singletonList(mLoc)); - } - // else we can't make it in the current location, we have to stitch one together - else { - // lets find out the remaining size of the current contig - long overflow = nextStart + proposedSize - 1 - length; - logger.debug("Overflow = " + overflow + " length: " + length); - - // set our last size counter to the remaining size - lastGenomeLocSize = proposedSize - overflow; - - // move to the next contig - // the next sequence should start at the begining of the next contig - Shard ret = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1))); - - // now jump ahead to the next contig - jumpContig(); - - // return the shard - return ret; - } - } - - /** jump to the next contig */ - private void jumpContig() { - ++seqLoc; - - if (!(seqLoc < dic.getSequences().size())) { - nextContig = false; - return; - } - logger.debug("Next contig, index = " + dic.getSequence(seqLoc).getSequenceIndex()); - mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0); - - - } - - /** - * is there another GenomeLoc to get? - * - * @return - */ - public boolean hasNext() { - if (this.stopDueToLimitingFactor) { - return false; - } - // if we don't have an interval file, use the non interval based approach. - if (this.intervals == null) { - return nextContig; - } else { - return (this.intervals.size() > 0); - } - } - - /** we don't support remove */ - public void remove() { - throw new UnsupportedOperationException("Can not remove records from a shard iterator!"); - } - - - /** - * to be for-each(able), we must implement this method - * - * @return - */ - public Iterator iterator() { - return this; - } - - /** - * this allows a shard strategy to get the current interval. It's kind of a hack, but for the - * locusWindowShardStrategy it was the best approach. - * - * @return - */ - protected GenomeLoc getCurrentInterval() { - if (this.intervals == null || intervals.size() < 1) { - return null; - } - return intervals.iterator().next(); - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java index a8fba0cf3..b62b33ca4 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java @@ -31,7 +31,7 @@ import java.util.List; */ public interface Shard extends Serializable { enum ShardType { - READ, LOCUS, READ_INTERVAL, LOCUS_INTERVAL + READ, LOCUS } /** @return the genome location represented by this shard */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java index 921abe570..8edafa1f2 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java @@ -38,9 +38,6 @@ import java.io.File; */ public class ShardStrategyFactory { public enum SHATTER_STRATEGY { - LINEAR, - READS, - INTERVAL, MONOLITHIC, // Put all of the available data into one shard. LOCUS_EXPERIMENTAL, READS_EXPERIMENTAL @@ -72,12 +69,6 @@ public class ShardStrategyFactory { */ static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) { switch (strat) { - case LINEAR: - return new LinearLocusShardStrategy(dic, startingSize, limitByCount); - case READS: - return new ReadDelimitedReadShardStrategy(startingSize, limitByCount); - case INTERVAL: - throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option"); case LOCUS_EXPERIMENTAL: return new IndexDelimitedLocusShardStrategy(readsDataSource,referenceDataSource,null); case READS_EXPERIMENTAL: @@ -116,12 +107,6 @@ public class ShardStrategyFactory { */ static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) { switch (strat) { - case LINEAR: - return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount); - case INTERVAL: - return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.LOCUS_INTERVAL); - case READS: - return new IntervalShardStrategy(startingSize, lst, Shard.ShardType.READ_INTERVAL); case LOCUS_EXPERIMENTAL: return new IndexDelimitedLocusShardStrategy(readsDataSource,referenceDataSource,lst); case READS_EXPERIMENTAL: diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java deleted file mode 100644 index e34f64cdc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java +++ /dev/null @@ -1,442 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.util.CloseableIterator; - -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; -import org.broadinstitute.sting.gatk.datasources.shards.ReadDelimitedReadShard; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * User: aaron - * Date: Mar 26, 2009 - * Time: 2:36:16 PM - *

- * Converts shards to SAM iterators over the specified region - */ -public class IndexDrivenSAMDataSource extends SAMDataSource { - // used for the reads case, the last count of reads retrieved - long readsTaken = 0; - - // our last genome loc position - protected GenomeLoc lastReadPos = null; - - // do we take unmapped reads - private boolean includeUnmappedReads = true; - - // reads based traversal variables - private boolean intoUnmappedReads = false; - private int readsSeenAtLastPos = 0; - - /** - * A histogram of exactly what reads were removed from the input stream and why. - */ - private SAMReadViolationHistogram violations = new SAMReadViolationHistogram(); - - // A pool of SAM iterators. - private SAMResourcePool resourcePool = null; - - private GenomeLoc mLastInterval = null; - - /** - * Returns a histogram of reads that were screened out, grouped by the nature of the error. - * @return Histogram of reads. Will not be null. - */ - public SAMReadViolationHistogram getViolationHistogram() { - return violations; - } - - /** - * constructor, given sam files - * - * @param reads the list of sam files - */ - public IndexDrivenSAMDataSource( Reads reads ) throws SimpleDataSourceLoadException { - super(reads); - resourcePool = new SAMResourcePool(reads); - } - - /** - * Do all BAM files backing this data source have an index? The case where hasIndex() is false - * is supported, but only in a few extreme cases. - * @return True if an index is present; false otherwise. - */ - public boolean hasIndex() { - return resourcePool.hasIndex; - } - - /** - * Retrieves the sort order of the readers. - * @return Sort order. For this datasource, MUST be coordinate. - */ - public SAMFileHeader.SortOrder getSortOrder() { - return SAMFileHeader.SortOrder.coordinate; - } - - /** - * Gets the (potentially merged) SAM file header. - * - * @return SAM file header. - */ - public SAMFileHeader getHeader() { - return resourcePool.getHeader(); - } - - public SAMFileHeader getHeader(SAMReaderID id) { - return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader(); - } - - /** - * Retrieves the id of the reader which built the given read. - * @param read The read to test. - * @return ID of the reader. - */ - public SAMReaderID getReaderID(SAMRecord read) { - if(resourcePool.readerToIDMap.containsKey(read.getFileSource().getReader())) - return resourcePool.readerToIDMap.get(read.getFileSource().getReader()); - throw new StingException("Unable to find reader id for record."); - } - - /** - * Returns Reads data structure containing information about the reads data sources placed in this pool as well as - * information about how they are downsampled, sorted, and filtered - * @return - */ - public Reads getReadsInfo() { return reads; } - - /** Returns true if there are read group duplicates within the merged headers. */ - public boolean hasReadGroupCollisions() { - return resourcePool.getHeaderMerger().hasReadGroupCollisions(); - } - - /** Returns the read group id that should be used for the input read and RG id. */ - public String getReadGroupId(final SAMReaderID id, final String originalReadGroupId) { - SAMFileReader reader = resourcePool.getFileToReaderMapping().get(id.samFile); - return resourcePool.getHeaderMerger().getReadGroupId(reader,originalReadGroupId); - } - - /** - * - * @param shard the shard to get data for - * - * @return an iterator for that region - */ - public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException { - // setup the iterator pool if it's not setup - boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true; - resourcePool.setQueryOverlapping(queryOverlapping); - - StingSAMIterator iterator = null; - if (shard.getShardType() == Shard.ShardType.READ) { - iterator = seekRead(shard); - iterator = applyDecoratingIterators(true, - iterator, - reads.getDownsamplingMethod().toFraction, - reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), - reads.getSupplementalFilters()); - } else if (shard.getShardType() == Shard.ShardType.LOCUS) { - iterator = seekLocus(shard); - iterator = applyDecoratingIterators(false, - iterator, - reads.getDownsamplingMethod().toFraction, - reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), - reads.getSupplementalFilters()); - } else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) || - (shard.getShardType() == Shard.ShardType.READ_INTERVAL)) { - iterator = seekLocus(shard); - iterator = applyDecoratingIterators(false, - iterator, - reads.getDownsamplingMethod().toFraction, - reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), - reads.getSupplementalFilters()); - - // add the new overlapping detection iterator, if we have a last interval and we're a read based shard - if(shard.getGenomeLocs().size() > 1) - throw new StingException("This SAMDataSource does not support multiple intervals within a single shard"); - GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0); - if(shard.getShardType() == Shard.ShardType.READ_INTERVAL) { - if (mLastInterval != null) - iterator = new PlusOneFixIterator(shardGenomeLoc,new IntervalOverlapIterator(iterator,mLastInterval,false)); - else - iterator = new PlusOneFixIterator(shardGenomeLoc,iterator); - mLastInterval = shardGenomeLoc; - } - } else { - - throw new StingException("seek: Unknown shard type"); - } - - return iterator; - } - - - /** - *

- * seekLocus - *

- * - * @param shard the shard containing the genome location to extract data for - * - * @return an iterator for that region - */ - private StingSAMIterator seekLocus( Shard shard ) throws SimpleDataSourceLoadException { - if(shard instanceof MonolithicShard) - return createIterator(new EntireStream()); - - if( getHeader().getSequenceDictionary().getSequences().size() == 0 ) - throw new StingException("Unable to seek to the given locus; reads data source has no alignment information."); - - if(shard.getGenomeLocs().size() > 1) - throw new StingException("This SAMDataSource does not support multiple intervals within a single shard"); - GenomeLoc shardGenomeLoc = shard.getGenomeLocs().get(0); - - return createIterator( new MappedStreamSegment(shardGenomeLoc) ); - } - - /** - *

- * seek - *

- * - * @param shard the read shard to extract from - * - * @return an iterator for that region - */ - private StingSAMIterator seekRead( Shard shard ) throws SimpleDataSourceLoadException { - if(shard instanceof MonolithicShard) - return createIterator(new EntireStream()); - - ReadDelimitedReadShard readShard = (ReadDelimitedReadShard)shard; - StingSAMIterator iter = null; - - // If there are no entries in the sequence dictionary, there can't possibly be any unmapped reads. Force state to 'unmapped'. - if( isSequenceDictionaryEmpty() ) - intoUnmappedReads = true; - - if (!intoUnmappedReads) { - if (lastReadPos == null) { - lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); - iter = createIterator(new MappedStreamSegment(lastReadPos)); - return InitialReadIterator(readShard.getSize(), iter); - } else { - lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); - iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos)))); - } - - if (intoUnmappedReads && !includeUnmappedReads) - readShard.signalDone(); - } - - if (intoUnmappedReads && includeUnmappedReads) { - if (iter != null) - iter.close(); - iter = toUnmappedReads(readShard.getSize()); - if (!iter.hasNext()) - readShard.signalDone(); - } - - return iter; - } - - /** - * If we're in by-read mode, this indicates if we want - * to see unmapped reads too. Only seeing mapped reads - * is much faster, but most BAM files have significant - * unmapped read counts. - * - * @param seeUnMappedReads true to see unmapped reads, false otherwise - */ - public void viewUnmappedReads( boolean seeUnMappedReads ) { - includeUnmappedReads = seeUnMappedReads; - } - - /** - * For unit testing, add a custom iterator pool. - * - * @param resourcePool Custom mock iterator pool. - */ - void setResourcePool( SAMResourcePool resourcePool ) { - this.resourcePool = resourcePool; - } - - /** - * Retrieve unmapped reads. - * - * @param readCount how many reads to retrieve - * - * @return the bounded iterator that you can use to get the intervaled reads from - */ - StingSAMIterator toUnmappedReads( long readCount ) { - StingSAMIterator iter = createIterator(new UnmappedStreamSegment(readsTaken, readCount)); - readsTaken += readCount; - return iter; - } - - - /** - * A seek function for mapped reads. - * - * @param readCount how many reads to retrieve - * @param iter the iterator to use, seeked to the correct start location - * - * @return the bounded iterator that you can use to get the intervaled reads from. Will be a zero-length - * iterator if no reads are available. - * @throws SimpleDataSourceLoadException - */ - StingSAMIterator fastMappedReadSeek( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException { - BoundedReadIterator bound; - correctForReadPileupSeek(iter); - if (readsTaken == 0) { - return InitialReadIterator(readCount, iter); - } - int x = 0; - SAMRecord rec = null; - - // Assuming that lastReadPos should never be null, because this is a mappedReadSeek - // and initial queries are handled by the previous conditional. - int lastContig = lastReadPos.getContigIndex(); - int lastPos = (int)lastReadPos.getStart(); - - while (x < readsTaken) { - if (iter.hasNext()) { - rec = iter.next(); - if (lastContig == rec.getReferenceIndex() && lastPos == rec.getAlignmentStart()) ++this.readsSeenAtLastPos; - else this.readsSeenAtLastPos = 1; - lastPos = rec.getAlignmentStart(); - ++x; - } else { - iter.close(); - - // jump contigs - lastReadPos = GenomeLocParser.toNextContig(lastReadPos); - if (lastReadPos == null) { - // check to see if we're using unmapped reads, if not return, we're done - readsTaken = 0; - intoUnmappedReads = true; - - // fastMappedReadSeek must return an iterator, even if that iterator iterates through nothing. - return new NullSAMIterator(reads); - } else { - readsTaken = readCount; - readsSeenAtLastPos = 0; - lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); - CloseableIterator ret = createIterator(new MappedStreamSegment(lastReadPos)); - return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount); - } - } - } - - // if we're off the end of the last contig (into unmapped territory) - if (rec != null && rec.getAlignmentStart() == 0) { - readsTaken += readCount; - intoUnmappedReads = true; - } - // else we're not off the end, store our location - else if (rec != null) { - int stopPos = rec.getAlignmentStart(); - if (stopPos < lastReadPos.getStart()) { - lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos); - } else { - lastReadPos = GenomeLocParser.setStart(lastReadPos,rec.getAlignmentStart()); - } - } - // in case we're run out of reads, get out - else { - throw new StingException("Danger: weve run out reads in fastMappedReadSeek"); - //return null; - } - bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount); - - - // return the iterator - return bound; - } - - /** - * Determines whether the BAM file is completely unsequenced. Requires that the resource pool be initialized. - * @return True if the sequence dictionary is completely empty. False otherwise. - */ - private boolean isSequenceDictionaryEmpty() { - return getHeader().getSequenceDictionary().isEmpty(); - } - - /** - * Even though the iterator has seeked to the correct location, there may be multiple reads at that location, - * and we may have given some of them out already. Move the iterator to the correct location using the readsAtLastPos variable - * - * @param iter the iterator - */ - private void correctForReadPileupSeek( StingSAMIterator iter ) { - // move the number of reads we read from the last pos - boolean atLeastOneReadSeen = false; // we have a problem where some chomesomes don't have a single read (i.e. the chrN_random chrom.) - for(int i = 0; i < this.readsSeenAtLastPos && iter.hasNext(); i++,iter.next()) - atLeastOneReadSeen = true; - if (readsSeenAtLastPos > 0 && !atLeastOneReadSeen) { - throw new SimpleDataSourceLoadException("Seek problem: reads at last position count != 0"); - } - } - - - /** - * set the initial iterator - * - * @param readCount the number of reads - * @param iter the merging iterator - * - * @return a bounded read iterator at the first read position in the file. - */ - private BoundedReadIterator InitialReadIterator( long readCount, CloseableIterator iter ) { - BoundedReadIterator bound; - bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount); - this.readsTaken = readCount; - return bound; - } - - /** - * Creates an iterator over the selected segment, from a resource pulled from the pool. - * @param segment Segment over which to gather reads. - * @return An iterator over just the reads in the given segment. - */ - private StingSAMIterator createIterator( DataStreamSegment segment ) { - StingSAMIterator iterator = resourcePool.iterator(segment); - StingSAMIterator malformedWrappedIterator = new MalformedSAMFilteringIterator( getHeader(), iterator, violations ); - StingSAMIterator readWrappingIterator = new ReadFormattingIterator(malformedWrappedIterator); - return readWrappingIterator; - } -} - - diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java deleted file mode 100644 index 62529e282..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.samtools.SAMFileReader; - -/** - * Abstract class that models a current state in some category of reads. - * @author hanna - * @version 0.1 - */ -abstract class ReadStreamPointer { - /** - * Describes the source of reads data. - */ - protected final Reads sourceInfo; - - /** - * Open handles to the reads info. - */ - protected final SamFileHeaderMerger headerMerger; - - public ReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) { - this.sourceInfo = sourceInfo; - this.headerMerger = headerMerger; - } - - /** - * Can this pointer access the provided segment efficiently? - * @param segment Segment to test. - * @return True if it would be quick for this segment to access the given data. - * False if accessing this data would require some sort of reinitialization. - */ - public abstract boolean canAccessSegmentEfficiently(DataStreamSegment segment); - - /** - * Close this resource, destroying all file handles. - */ - public void close() { - for (SAMFileReader reader : headerMerger.getReaders()) - reader.close(); - } - - /** - * Returns Reads data structure containing information about the reads data sources as well as - * information about how they are downsampled, sorted, and filtered - * @return - */ - public Reads getReadsInfo() { - return sourceInfo; - } - - /** - * Returns header merger: a class that keeps the mapping between original read groups and read groups - * of the merged stream; merger also provides access to the individual file readers (and hence headers - * too) maintained by the system. - * @return - */ - public SamFileHeaderMerger getHeaderMerger() { - return headerMerger; - } - - /** - * Remove an iterator from service. - * @param iterator The iterator to remove from service. Must not be null. - */ - public abstract void destroy( StingSAMIterator iterator ); - - /** - * Get a stream of all the reads that overlap a given segment. - * @param segment Segment to check for overlaps. - * @return An iterator over all reads overlapping the given segment. - */ - public abstract StingSAMIterator getReadsOverlapping( DataStreamSegment segment ); - - /** - * Get a stream of all the reads that are completely contained by a given segment. - * The segment can be mapped or unmapped. - * @param segment Segment to check for containment.. - * @return An iterator over all reads contained by the given segment. - */ - public abstract StingSAMIterator getReadsContainedBy( DataStreamSegment segment ); -} - -class MappedReadStreamPointer extends ReadStreamPointer { - - public MappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) { - super( sourceInfo, headerMerger ); - } - - /** - * MappedReadStreamPointers can access any segment efficiently. Always return true. - * @param segment Segment to test. - * @return True. - */ - public boolean canAccessSegmentEfficiently(DataStreamSegment segment) { - return true; - } - - /** - * {@inheritDoc} - */ - public void destroy( StingSAMIterator iterator ) { - iterator.close(); - } - - - /** - * {@inheritDoc} - */ - @Override - public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { - if(!(segment instanceof MappedStreamSegment)) - throw new UnsupportedOperationException("MappedReadStreamPointer cannot get reads overlapping an unmapped stream segment"); - MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; - - MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); - - // The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries - // over a given interval would occasionally not pick up the last read in that interval. - mergingIterator.queryOverlapping( mappedSegment.locus.getContig(), - (int)mappedSegment.locus.getStart(), - (int)mappedSegment.locus.getStop()+1); - - return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); - } - - /** - * {@inheritDoc} - */ - @Override - public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) { - if( !(segment instanceof MappedStreamSegment) ) - throw new StingException("Trying to access unmapped content from a mapped read stream pointer"); - MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; - MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); - // NOTE: explicitly not using the queryOverlapping hack above since, according to the above criteria, - // we'd only miss reads that are one base long when performing a contained query. - mergingIterator.queryContained( mappedSegment.locus.getContig(), - (int)mappedSegment.locus.getStart(), - (int)mappedSegment.locus.getStop()+1); - return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); - } - - /** - * Convert a mapped read stream pointer to an unmapped read stream pointer, transferring ownership - * of the underlying file handles to the new container. - * After doing this conversion, the source MappedReadStreamPointer should not be used. - * @return - */ - public UnmappedReadStreamPointer toUnmappedReadStreamPointer() { - return new UnmappedReadStreamPointer( sourceInfo, headerMerger ); - } -} - -class UnmappedReadStreamPointer extends ReadStreamPointer { - /** - * A pointer to the current position of this iterator in the read stream. - */ - private PositionTrackingIterator unmappedIterator = null; - - public UnmappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) { - super( sourceInfo, headerMerger ); - - MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); - mergingIterator.queryUnmappedReads(); - unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L ); - } - - /** - * UnmappedReadStreamPointers are streams and can therefore access 'future' reads in the file quickly, - * but reads already seen are difficult to seek to. - * @param segment Segment to test. - * @return True if this DataStreamSegment follows the current position. - */ - public boolean canAccessSegmentEfficiently(DataStreamSegment segment) { - if( !(segment instanceof UnmappedStreamSegment) ) - return false; - return unmappedIterator.getPosition() <= ((UnmappedStreamSegment)segment).position; - } - - /** - * {@inheritDoc} - */ - @Override - public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { - throw new UnsupportedOperationException("Unable to determine overlapped reads of an unmapped segment"); - } - - /** - * {@inheritDoc} - */ - @Override - public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) { - if( !(segment instanceof UnmappedStreamSegment) ) - throw new StingException("Trying to access mapped content from an unmapped read stream pointer"); - - UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment; - - // Force the iterator to the next pending position. - while(unmappedIterator.getPosition() < unmappedSegment.position) - unmappedIterator.next(); - - return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size); - } - - /** - * {@inheritDoc} - */ - @Override - public void close() { - if( unmappedIterator != null ) - unmappedIterator.close(); - super.close(); - } - - /** - * {@inheritDoc} - */ - public void destroy( StingSAMIterator iterator ) { - // Don't destroy the iterator; reuse it. - } - -} - -class EntireReadStreamPointer extends ReadStreamPointer { - /** - * Create a new pointer that can return info about the entire read stream. - * @param sourceInfo Source info for the reads. - * @param headerMerger Header merging apparatus. - * - */ - public EntireReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) { - super( sourceInfo, headerMerger ); - } - - /** - * An EntireReadStreamPointer can only efficiently access the entire file. - * @param segment Segment to test. - * @return true if requesting the entire stream. - */ - public boolean canAccessSegmentEfficiently(DataStreamSegment segment) { - return segment instanceof EntireStream; - } - - /** - * Get a stream of all the reads that overlap a given segment. - * @param segment Segment to check for overlaps. - * @return An iterator over all reads overlapping the given segment. - */ - @Override - public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { - if(!(segment instanceof EntireStream)) - throw new StingException("EntireReadStreamPointer can only get reads overlapping the entire stream."); - return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo)); - } - - /** - * Get a stream of all the reads that are completely contained by a given segment. - * @param segment Segment to check for containment.. - * @return An iterator over all reads contained by the given segment. - */ - @Override - public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) { - if(!(segment instanceof EntireStream)) - throw new StingException("EntireReadStreamPointer can only get reads contained by the entire stream."); - return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo)); - } - - /** - * {@inheritDoc} - */ - public void destroy( StingSAMIterator iterator ) { - iterator.close(); - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java deleted file mode 100644 index 38899a7c7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.picard.sam.SamFileHeaderMerger; - -import java.util.*; -import java.io.File; - -/** - * Represents a single stream of read data. Used to represent the state of the stream and determine - * whether the state of this resource is such that it can field the desired query. - * @author hanna - * @version 0.1 - */ -class ReadStreamResource { - final static boolean eagerDecode = true; - - /** - * Do all the constituent components of this ReadStreamResource have indices? - * In general, BAM files without indices are not supported, but in a few specific - * cases we do allow this for the Picard pipeline. - * @return true if all BAM files have indices; false otherwise. - */ - protected boolean hasIndex() { - for(SAMFileReader reader: readStreamPointer.getHeaderMerger().getReaders()) { - if(!reader.hasIndex()) - return false; - } - return true; - } - - protected final boolean hasIndex; - - /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(ReadStreamPointer.class); - - /** - * The (possibly merged) header for the input fileset. - */ - private final SAMFileHeader header; - - /** - * A pointer to the current location of the file. - */ - private ReadStreamPointer readStreamPointer = null; - - /** - * A mapping from original input file to merged read group record ids - */ - private Map fileToReaderMap = null; - - /** - * A mapping from reader back to the ID uniquely identifying this input file. - */ - private Map readerToIDMap = null; - - public ReadStreamResource( Reads sourceInfo ) { - SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate); - - this.header = headerMerger.getMergedHeader(); - - boolean indexPresent = true; - for(SAMFileReader reader: headerMerger.getReaders()) { - if(!reader.hasIndex()) - indexPresent = false; - } - hasIndex = indexPresent; - - if(hasIndex) - readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger); - else - readStreamPointer = new EntireReadStreamPointer(sourceInfo, headerMerger); - } - - /** - * Gets the header information for the read stream. - * @return Header information for the read stream. - */ - public SAMFileHeader getHeader() { - return header; - } - - /** - * Returns Reads data structure containing information about the reads data sources as well as - * information about how they are downsampled, sorted, and filtered - * @return the Reads object - */ - public Reads getReadsInfo() { return readStreamPointer.getReadsInfo(); } - - /** - * Returns header merger: a class that keeps the mapping between original read groups and read groups - * of the merged stream; merger also provides access to the individual file readers (and hence headers - * too) maintained by the system. - * @return the header merger - */ - public SamFileHeaderMerger getHeaderMerger() { return readStreamPointer.getHeaderMerger(); } - - public boolean canAccessSegmentEfficiently(DataStreamSegment segment) { - return readStreamPointer.canAccessSegmentEfficiently(segment); - } - - public void close() { - readStreamPointer.close(); - } - - public void destroy( StingSAMIterator iterator ) { - readStreamPointer.destroy(iterator); - } - - public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) { - if( readStreamPointer instanceof MappedReadStreamPointer && segment instanceof UnmappedStreamSegment ) - readStreamPointer = ((MappedReadStreamPointer)readStreamPointer).toUnmappedReadStreamPointer(); - return readStreamPointer.getReadsContainedBy(segment); - } - - public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { - return readStreamPointer.getReadsOverlapping(segment); - } - - public Map getFileToReaderMapping() { - return fileToReaderMap; - } - - public Map getReaderToIDMapping() { - return readerToIDMap; - } - - /** - * A private function that, given the internal file list, generates a merging construct for - * all available files. - * @param reads source information about the reads. - * @param SORT_ORDER sort order for the reads. - * @return a list of SAMFileReaders that represent the stored file names - * @throws SimpleDataSourceLoadException if the file cannot be opened. - */ - private SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER ) - throws SimpleDataSourceLoadException { - - // right now this is pretty damn heavy, it copies the file list into a reader list every time - List lst = new ArrayList(); - fileToReaderMap = new HashMap(); - readerToIDMap = new HashMap(); - for (File f : reads.getReadsFiles()) { - SAMFileReader reader = new SAMFileReader(f, eagerDecode); - fileToReaderMap.put(f, reader); - readerToIDMap.put(reader,new SAMReaderID(f)); - reader.setValidationStringency(reads.getValidationStringency()); - - final SAMFileHeader header = reader.getFileHeader(); - logger.debug(String.format("Sort order is: " + header.getSortOrder())); - - if (reader.getFileHeader().getReadGroups().size() < 1) { - //logger.warn("Setting header in reader " + f.getName()); - SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName()); - rec.setLibrary(f.getName()); - rec.setSample(f.getName()); - - reader.getFileHeader().addReadGroup(rec); - } - - lst.add(reader); - } - - return new SamFileHeaderMerger(lst,SORT_ORDER,true); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index ca13ebedb..6a3cce7fa 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -167,18 +167,6 @@ public abstract class SAMDataSource implements SimpleDataSource { */ public abstract StingSAMIterator seek(Shard shard); - /** - * If we're in by-read mode, this indicates if we want - * to see unmapped reads too. Only seeing mapped reads - * is much faster, but most BAM files have significant - * unmapped read counts. - * - * @param seeUnMappedReads true to see unmapped reads, false otherwise - */ - public void viewUnmappedReads( boolean seeUnMappedReads ) { - includeUnmappedReads = seeUnMappedReads; - } - /** * Filter reads based on user-specified criteria. * diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java deleted file mode 100644 index 9a1504ecb..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.Reads; -import org.apache.log4j.Logger; - -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileReader; - -import java.util.List; -import java.util.Map; -import java.io.File; - -/** - * Maintain a pool of resources of accessors to SAM read data. SAMFileReaders and - * headers are actually quite expensive to open, so this class manages the mechanics - * of keeping them open and reusing them. - * @author hanna - * @version 0.1 - */ -@Deprecated -class SAMResourcePool extends ResourcePool { - /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(SAMResourcePool.class); - - /** Source information about the reads. */ - protected Reads reads; - protected SamFileHeaderMerger headerMerger; - protected Map fileToReaderMap; - protected Map readerToIDMap; - - /** - * Do all the constituent BAM files have indices? We support some very limited - * cases where not all BAM files have indices available. - */ - protected final boolean hasIndex; - - /** Is this a by-reads traversal or a by-locus? */ - protected boolean queryOverlapping; - - /** File header for the combined file. */ - protected final SAMFileHeader header; - - public SAMResourcePool( Reads reads ) { - this.reads = reads; - this.queryOverlapping = true; - - ReadStreamResource streamResource = createNewResource(); - this.header = streamResource.getHeader(); - this.headerMerger = streamResource.getHeaderMerger(); - this.hasIndex = streamResource.hasIndex(); - this.fileToReaderMap = streamResource.getFileToReaderMapping(); - this.readerToIDMap = streamResource.getReaderToIDMapping(); - - // Add this resource to the pool. - this.addNewResource(streamResource); - } - - /** Get the combined header for all files in the iterator pool. */ - public SAMFileHeader getHeader() { - return header; - } - - /** - * Returns Reads data structure containing information about the reads data sources placed in this pool as well as - * information about how they are downsampled, sorted, and filtered - * @return - */ - public Reads getReadsInfo() { return reads; } - - /** - * Returns a mapping from original input files to the SAMFileReaders - * - * @return the mapping - */ - public Map getFileToReaderMapping() { - return fileToReaderMap; - } - - /** - * Returns header merger: a class that keeps the mapping between original read groups and read groups - * of the merged stream; merger also provides access to the individual file readers (and hence headers - * too) maintained by the system. - * @return - */ - public SamFileHeaderMerger getHeaderMerger() { return headerMerger; } - - protected ReadStreamResource selectBestExistingResource( DataStreamSegment segment, List resources ) { - for (ReadStreamResource resource : resources) { - if (resource.canAccessSegmentEfficiently(segment)) { - return resource; - } - } - return null; - } - - protected ReadStreamResource createNewResource() { - return new ReadStreamResource(reads); - } - - protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamResource streamResource ) { - StingSAMIterator iterator = null; - - if (!queryOverlapping) - iterator = streamResource.getReadsContainedBy(segment); - else - iterator = streamResource.getReadsOverlapping(segment); - - return new ReleasingIterator( streamResource, iterator ); - } - - protected void closeResource( ReadStreamResource resource ) { - resource.close(); - } - - private class ReleasingIterator implements StingSAMIterator { - /** - * The resource acting as the source of the data. - */ - private final ReadStreamResource resource; - - /** - * The iterator to wrap. - */ - private final StingSAMIterator wrappedIterator; - - public Reads getSourceInfo() { - return wrappedIterator.getSourceInfo(); - } - - public ReleasingIterator( ReadStreamResource resource, StingSAMIterator wrapped ) { - this.resource = resource; - this.wrappedIterator = wrapped; - } - - public ReleasingIterator iterator() { - return this; - } - - public void remove() { - throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); - } - - public void close() { - resource.destroy(wrappedIterator); - release(this); - } - - public boolean hasNext() { - return wrappedIterator.hasNext(); - } - - public SAMRecord next() { - return wrappedIterator.next(); - } - } - - public boolean isQueryOverlapping() { - return queryOverlapping; - } - - public void setQueryOverlapping( boolean queryOverlapping ) { - this.queryOverlapping = queryOverlapping; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index c427c8479..e66c86450 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -57,7 +57,7 @@ public class LinearMicroScheduler extends MicroScheduler { for (Shard shard : shardStrategy) { // New experimental code for managing locus intervals. - if(shard.getShardType() == Shard.ShardType.LOCUS || shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) { + if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; WindowMaker windowMaker = new WindowMaker(getReadIterator(shard), shard.getGenomeLocs(), walker.getMandatoryReadFilters(), lWalker.getDiscards()); for(WindowMaker.WindowMakerIterator iterator: windowMaker) { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java index 457406642..021f57f84 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/BoundedReadIterator.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; +import net.sf.picard.sam.MergingSamRecordIterator; import java.util.Iterator; @@ -87,8 +88,8 @@ public class BoundedReadIterator implements StingSAMIterator { public SAMFileHeader getHeader() { // todo: this is bad, we need an iterface out there for samrecords that supports getting the header, // regardless of the merging - if (iterator instanceof MergingSamRecordIterator2) - return ((MergingSamRecordIterator2)iterator).getHeader(); + if (iterator instanceof MergingSamRecordIterator) + return ((MergingSamRecordIterator)iterator).getMergedHeader(); else return null; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java deleted file mode 100644 index bb2caab9c..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIterator.java +++ /dev/null @@ -1,113 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; - -import java.util.Iterator; - - -/** - * - * @author aaron - * - * Class DuplicateDetectorIterator - * - * remove reads that overlap the passed in interval, yea! - * - */ -public class IntervalOverlapIterator implements StingSAMIterator { - - // our wrapped iterator - private final StingSAMIterator mIter; - private final boolean throwException; - - // storage for the next record - private SAMRecord mNextRecord = null; - - // the genomic location which we filter on - private final GenomeLoc mLoc; - - /** - * Create a DuplicateDetectorIterator from another sam iterator - * @param iter something that implements StingSAMIterator - * @param blowUpOnDup if we find a dup, do we throw an exception (blow up) or do we drop it - */ - public IntervalOverlapIterator(StingSAMIterator iter, GenomeLoc filterLocation, boolean blowUpOnDup) { - this.mIter = iter; - this.throwException = blowUpOnDup; - this.mLoc = filterLocation; - if (iter.hasNext()) { - next(); - } - } - - /** - * Gets source information for the reads. Contains information about the original reads - * files, plus information about downsampling, etc. - * - * @return - */ - public Reads getSourceInfo() { - return mIter.getSourceInfo(); - } - - /** - * close this iterator - */ - public void close() { - if (mIter != null) mIter.close(); - } - - /** - * do we have a next? - * @return true if yes, false if not - */ - public boolean hasNext() { - return (mNextRecord != null); - } - - /** - * get the next record - * @return a SAMRecord - */ - public SAMRecord next() { - SAMRecord ret = mNextRecord; - while (mIter.hasNext()) { - mNextRecord = mIter.next(); - if (!isOverlaping(mNextRecord)) return ret; - } - mNextRecord = null; - return ret; - } - - /** - * not supported - */ - public void remove() { - throw new UnsupportedOperationException("You can't call remove, so, like, I guess please don't"); - } - - /** - * create an iterator out of the this type - * @return this! - */ - public Iterator iterator() { - return this; - } - - /** - * determine if a read overlaps the specified interval that was passed in - * @param rec the read - * @return true if it overlaps, false otherwise - */ - private boolean isOverlaping(SAMRecord rec) { - boolean overlap = mLoc.overlapsP(GenomeLocParser.createGenomeLoc(rec)); - if (overlap && this.throwException) - throw new StingException("IntervalOverlapIterator found a overlapping read " + rec.getReadName() + " with overlap " + this.mLoc.toString()); - return overlap; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 203c3faf8..7f7b8da13 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -43,337 +43,6 @@ import java.util.Iterator; import java.util.List; import java.util.PriorityQueue; -/** - * Provides an iterator interface for merging multiple underlying iterators into a single - * iterable stream. The underlying iterators/files must all have the same sort order unless - * the requested output format is unsorted, in which case any combination is valid. - */ -public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, QueryIterator { - protected PriorityQueue pq = null; - protected final SamFileHeaderMerger samHeaderMerger; - protected final SAMFileHeader.SortOrder sortOrder; - protected static Logger logger = Logger.getLogger(MergingSamRecordIterator2.class); - private SAMRecord mNextRecord; - protected boolean initialized = false; - protected final Reads reads; - protected static boolean warnedUserAboutSortOrder = false; // so we only warn the user once - - /** - * Constructs a new merging iterator with the same set of readers and sort order as - * provided by the header merger parameter. - * - * @param headerMerger the header to merge - * @param reads the reads pile - */ - public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) { - this.samHeaderMerger = headerMerger; - this.reads = reads; - this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); - this.pq = new PriorityQueue(samHeaderMerger.getReaders().size()); - - } - - /** - * we hold off initializing because we don't know if they plan to query or just iterate. This function - * gets called if the hasNext() determines we haven't been query-ed yet, and we want to setup the iterator to start - * at the beginning of the read pile - */ - private void lazyInitialization() { - if (initialized) { - throw new UnsupportedOperationException("You cannot double initialize a MergingSamRecordIterator2"); - } - final SAMRecordComparator comparator = getComparator(); - for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - checkSortOrder(reader); - - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, comparator); - addIfNotEmpty(iterator); - } - setInitialized(); - - } - - /** - * verify the sort order - * - * @param reader the reader to check - */ - private void checkSortOrder(SAMFileReader reader) { - if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) { - String msg = String.format("The GATK requires your bam have %s sort order, but your BAM file header %s. Continuing beyond this point is unsafe" + - "-- please update your BAM file to have a compatible sort order using samtools sort or Picard MergeBamFiles. You may overcome this error by " + - "passing the \'-U ALLOW_UNSET_BAM_SORT_ORDER\' on the command line; before doing so make sure that your input BAM is sorted even if it can't be marked as such.", - this.sortOrder, reader.getFileHeader().getAttribute("SO") == null ? "is missing the SO sort order flag" : "has an SO flag set to " + reader.getFileHeader().getAttribute("SO")); - if (!reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.ALLOW_UNSET_BAM_SORT_ORDER)) { - throw new PicardException(msg); - } else if (!warnedUserAboutSortOrder) { - warnedUserAboutSortOrder = true; - Utils.warnUser(msg); - } - - } - } - - public boolean supportsSeeking() { - return true; - } - - public void queryOverlapping(final String contig, final int start, final int stop) { - if (initialized) { - throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); - } - final SAMRecordComparator comparator = getComparator(); - - for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - checkSortOrder(reader); - Iterator recordIter = reader.queryOverlapping(contig, start, stop); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); - addIfNotEmpty(iterator); - } - setInitialized(); - - } - - public void query(final String contig, final int start, final int stop, final boolean contained) { - if (initialized) { - throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); - } - final SAMRecordComparator comparator = getComparator(); - for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - checkSortOrder(reader); - Iterator recordIter = reader.query(contig, start, stop, contained); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); - addIfNotEmpty(iterator); - } - setInitialized(); - - } - - public void queryUnmappedReads() { - if (initialized) { - throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); - } - final SAMRecordComparator comparator = getComparator(); - for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - Iterator recordIter = null; - if (reader.hasIndex()) { - recordIter = reader.queryUnmapped(); - } else { - // HACK: Supporting completely unmapped BAM files is easy. Let's do a quick check to make sure - // these BAMs aren't partially indexed. - if (reader.getFileHeader().getSequenceDictionary().size() > 0) - throw new StingException("Partially mapped BAM files without indices are not supported"); - recordIter = reader.iterator(); - } - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); - addIfNotEmpty(iterator); - } - setInitialized(); - - } - - - public void queryContained(final String contig, final int start, final int stop) { - if (initialized) { - throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); - } - final SAMRecordComparator comparator = getComparator(); - for (final SAMFileReader reader : samHeaderMerger.getReaders()) { - checkSortOrder(reader); - Iterator recordIter = reader.queryContained(contig, start, stop); - final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(this.samHeaderMerger, reader, recordIter, comparator); - addIfNotEmpty(iterator); - } - setInitialized(); - - } - - private void setInitialized() { - initialized = true; - mNextRecord = nextRecord(); - } - - - /** Returns true if any of the underlying iterators has more records, otherwise false. */ - public boolean hasNext() { - if (!initialized) { - lazyInitialization(); - } - if (this.pq.isEmpty() && mNextRecord == null) { - return false; - } - return true; - } - - public SAMRecord next() { - SAMRecord r = mNextRecord; - if (!this.pq.isEmpty()) { - mNextRecord = nextRecord(); - } else { - mNextRecord = null; - } - return r; - } - - /** @return the next record, without moving the iterator */ - public SAMRecord peek() { - return mNextRecord; - } - - /** - * Returns the next record from the top most iterator during merging. - * - * @return the next record, null if it's unavailable - */ - public SAMRecord nextRecord() { - if (!initialized) { - lazyInitialization(); - } - - final ComparableSamRecordIterator iterator = this.pq.poll(); - - if (iterator == null) { - return null; - } - final SAMRecord record = iterator.next(); - addIfNotEmpty(iterator); - - // Fix the read group if needs be - if (this.samHeaderMerger.hasReadGroupCollisions()) { - final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); - if (oldGroupId != null ) { - final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader(), oldGroupId); - record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); - } - } - - // Fix the program group if needs be - if (this.samHeaderMerger.hasProgramGroupCollisions()) { - final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID); - if (oldGroupId != null ) { - final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader(), oldGroupId); - record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); - } - } - - // if we don't have a read group, set one correctly - if (record.getAttribute(SAMTag.RG.toString()) == null) { - List readGroups = iterator.getReader().getFileHeader().getReadGroups(); - if (readGroups.size() == 1) { - record.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId()); - record.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId()); - } else { - logger.warn("Unable to set read group of ungrouped read: unable to pick default group, there are " + readGroups.size() + " possible."); - } - } - - record.setHeader(samHeaderMerger.getMergedHeader()); - - //System.out.printf("NEXT = %s %s %d%n", record.getReadName(), record.getReferenceName(), record.getAlignmentStart()); - //System.out.printf("PEEK = %s %s %d%n", this.pq.peek().peek().getReadName(), this.pq.peek().peek().getReferenceName(), this.pq.peek().peek().getAlignmentStart()); - return record; - } - - /** - * Adds iterator to priority queue. If the iterator has more records it is added - * otherwise it is closed and not added. - * - * @param iterator the iterator to add - */ - protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) { - //System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart()); - if (iterator.hasNext()) { - pq.offer(iterator); - } else { - iterator.close(); - } - } - - /** Unsupported operation. */ - public void remove() { - throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); - } - - /** - * Get the right comparator for a given sort order (coordinate, alphabetic). In the - * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive - * ordering. - * - * @return the SAMRecordComparator appropriate for our sort order - */ - protected SAMRecordComparator getComparator() { - // For unsorted build a fake comparator that compares based on object ID - if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { - return new SAMRecordComparator() { - public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { - return System.identityHashCode(lhs) - System.identityHashCode(rhs); - } - - public int compare(final SAMRecord lhs, final SAMRecord rhs) { - return fileOrderCompare(lhs, rhs); - } - }; - } - - // Otherwise try and figure out what kind of comparator to return and build it - final Class type = this.sortOrder.getComparator(); - - try { - final Constructor ctor = type.getConstructor(SAMFileHeader.class); - //System.out.printf("Getting comparator %s%n", ctor.toGenericString()); - return ctor.newInstance(this.samHeaderMerger.getMergedHeader()); - } - catch (Exception e) { - try { - final Constructor ctor = type.getConstructor(); - return ctor.newInstance(); - } - catch (Exception e2) { - throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e2); - } - } - } - - /** - * Returns the merged header that the merging iterator is working from. - * - * @return our sam file header - */ - public SAMFileHeader getHeader() { - return this.samHeaderMerger.getMergedHeader(); - } - - - /** - * closes all open iterators....DO THIS or you will run out of handles - * with sharding. - */ - public void close() { - for (ComparableSamRecordIterator iterator : pq) - iterator.close(); - } - - - /** - * allows us to be used in the new style for loops - * - * @return this iterator, which can be used in for loop each iterations - */ - public Iterator iterator() { - return this; - } - - /** - * Gets source information for the reads. Contains information about the original reads - * files, plus information about downsampling, etc. - * - * @return the read object we were created for - */ - public Reads getSourceInfo() { - return this.reads; - } -} - // Should replace picard class with the same name class ComparableSamRecordIterator extends PeekableIterator implements Comparable, StingSAMIterator { private Reads sourceInfo; diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java deleted file mode 100644 index 8ad5477a7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/PlusOneFixIterator.java +++ /dev/null @@ -1,106 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; - -import java.util.Iterator; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.gatk.Reads; - -/** - * Created by IntelliJ IDEA. - * User: aaronmckenna - * Date: Nov 4, 2009 - * Time: 10:41:02 PM - */ -public class PlusOneFixIterator implements StingSAMIterator { - /** - * our interval region - */ - private final GenomeLoc mInterval; - - /** - * our iterator - */ - private final StingSAMIterator mIterator; - - /** - * our next record - */ - private SAMRecord mNextRecord; - - /** - * This iterator filters all reads that have their start < PLUS_ONE_FIX_CONSTANT from the end of the - * interval: - *

- *

- * (this won't make sense in a non-fixed width font) - * -------------- - * interval | - * ------------------------ - * | good read| - * -------------------------------------------- - * | overlap < PLUS_ONE_FIX_CONSTANT, we drop| - * ------------------------------------------- - *

- * The problem is that we had to put a +1 on our queryOverlapping() stop position value to SAMTools - * due to an indexing problem. Other iterators don't know about this adjustment though, so calculations - * like overlap fail. This iterator eliminates them before the read is seen by other iterators. - *

- * create a plus one fix iterator, given: - * - * @param inteveral the interval we're checking - * @param iterator the iterator to draw reads from - */ - public PlusOneFixIterator(GenomeLoc inteveral, StingSAMIterator iterator) { - if (inteveral == null || iterator == null) - throw new StingException("Both parameters to PlusOneFixIterator have to != null"); - - // save off the iterator and the interval - mInterval = inteveral; - mIterator = iterator; - - // pop off the first read - if (mIterator.hasNext()) { - next(); - } - } - - - @Override - public boolean hasNext() { - return (mNextRecord != null); - } - - @Override - public SAMRecord next() { - SAMRecord ret = mNextRecord; - while (mIterator.hasNext()) { - mNextRecord = mIterator.next(); - if (!(mNextRecord.getAlignmentStart() > mInterval.getStop())) return ret; - } - mNextRecord = null; - return ret; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("REMOVE on PlusOneFixIterator is not supported"); - } - - @Override - public Reads getSourceInfo() { - return this.mIterator.getSourceInfo(); - } - - @Override - public void close() { - this.mIterator.close(); - } - - @Override - public Iterator iterator() { - return this; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java deleted file mode 100644 index a4375b68e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordComparator; -import net.sf.samtools.SAMFileReader; - -import java.util.Iterator; - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - * - * This interface indicates that the iterator is query able - */ -public interface QueryIterator extends StingSAMIterator, PeekingIterator { - - /** - * The required methods to query able - **/ - public void queryOverlapping(final String contig, final int start, final int stop); - public void query(final String contig, final int start, final int stop, final boolean contained); - public void queryUnmappedReads(); - public void queryContained(final String contig, final int start, final int stop); -} diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index 0f2d6e84b..c06abec6b 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -7,7 +7,6 @@ import net.sf.samtools.SAMRecord; import java.util.List; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.gatk.iterators.QueryIterator; /* @@ -42,7 +41,7 @@ import org.broadinstitute.sting.gatk.iterators.QueryIterator; * to test out classes that use specific itervals. The reads returned will * all lie in order in the specified interval. */ -public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements QueryIterator { +public class ArtificialSAMQueryIterator extends ArtificialSAMIterator { // get the next positon protected int finalPos = 0; diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index dca0b2e62..634f004da 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.iterators.QueryIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.StingException; import java.io.File; @@ -203,7 +203,7 @@ public class ArtificialSAMUtils { * * @return StingSAMIterator representing the specified amount of fake data */ - public static QueryIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) { + public static StingSAMIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) { SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header); @@ -219,7 +219,7 @@ public class ArtificialSAMUtils { * * @return StingSAMIterator representing the specified amount of fake data */ - public static ArtificialSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { + public static StingSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); @@ -250,7 +250,7 @@ public class ArtificialSAMUtils { * * @return StingSAMIterator representing the specified amount of fake data */ - public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { + public static StingSAMIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyUnitTest.java deleted file mode 100755 index 87cc78c85..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardStrategyUnitTest.java +++ /dev/null @@ -1,149 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import org.junit.Test; -import org.junit.Before; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.BaseTest; -import net.sf.samtools.SAMFileHeader; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - *

- * Class ReadIntervalShardStrategyUnitTest - *

- * Tests the ReadIntervalShardStrategy class - * - * - * TODO: this test has been changed, since we now force interval shards to not subdivide if they're large, - * so you should always get one shard back, reguardless of the specified length. If this behavior changes - * back in the future, the expected number of shards should change from 1 to > 1. - * - */ -public class IntervalShardStrategyUnitTest extends BaseTest { - - private GenomeLocSortedSet mSortedSet = null; - private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); - private static final int NUMBER_OF_CHROMOSOMES = 5; - private static final int STARTING_CHROMOSOME = 1; - private static final int CHROMOSOME_SIZE = 1000; - - @Before - public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - mSortedSet = new GenomeLocSortedSet(); - } - - @Test - public void testNoExceptionOnEmpty() { - IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - } - - @Test - public void testSingleChromosomeFunctionality() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000); - mSortedSet.add(loc); - IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - int counter = 0; - Shard d = null; - while (strat.hasNext()) { - d = strat.next(); - counter++; - } - assertTrue(d instanceof IntervalShard); - assertEquals(1, counter); - } - - @Test - public void testMultipleChromosomeFunctionality() { - for (int x = 0; x < 5; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); - mSortedSet.add(loc); - } - IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - int counter = 0; - Shard d = null; - while (strat.hasNext()) { - d = strat.next(); - counter++; - } - assertTrue(d instanceof IntervalShard); - assertEquals(5, counter); - } - - @Test - public void testOddSizeShardFunctionality() { - for (int x = 0; x < 5; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); - mSortedSet.add(loc); - } - IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - int counter = 0; - while (strat.hasNext()) { - Shard d = strat.next(); - assertEquals(1,d.getGenomeLocs().size()); - assertEquals(1, d.getGenomeLocs().get(0).getStart()); - assertEquals(1000, d.getGenomeLocs().get(0).getStop()); - counter++; - } - assertEquals(5, counter); - } - - - @Test - public void testInfiniteShardSize() { - for (int x = 0; x < 5; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000); - mSortedSet.add(loc); - } - IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - int counter = 0; - while (strat.hasNext()) { - Shard d = strat.next(); - assertEquals(1,d.getGenomeLocs().size()); - assertEquals(1000, d.getGenomeLocs().get(0).getStop()); - counter++; - } - assertEquals(5, counter); - } - - @Test(expected = UnsupportedOperationException.class) - public void testRemove() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000); - mSortedSet.add(loc); - IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet,Shard.ShardType.LOCUS_INTERVAL); - strat.remove(); - } - -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardUnitTest.java deleted file mode 100755 index f0bc94200..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/IntervalShardUnitTest.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.junit.Before; -import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import net.sf.samtools.SAMFileHeader; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - *

- * Class IntervalReadShardTest - *

- * Tests for the IntervalReadShard class. - */ -public class IntervalShardUnitTest extends BaseTest { - - private IntervalShard intervalShard = null; - private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); - private static final int NUMBER_OF_CHROMOSOMES = 5; - private static final int STARTING_CHROMOSOME = 1; - private static final int CHROMOSOME_SIZE = 1000; - - @Before - public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - } - - - @Test - public void simpleReturn() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); - intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL); - assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size()); - assertTrue(intervalShard.getGenomeLocs().get(0).equals(loc)); - } - - @Test - public void ensureNotReference() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100); - intervalShard = new IntervalShard(loc,Shard.ShardType.LOCUS_INTERVAL); - assertEquals("Input parameters imply a single-locus shard",1,intervalShard.getGenomeLocs().size()); - assertTrue(intervalShard.getGenomeLocs().get(0) != loc && intervalShard.getGenomeLocs().get(0).equals(loc)); - } - -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyUnitTest.java deleted file mode 100755 index 64fb380c9..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/LinearLocusShardStrategyUnitTest.java +++ /dev/null @@ -1,122 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.BaseTest; -import org.junit.Before; -import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import net.sf.samtools.SAMFileHeader; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - *

- * Class LocusShardStrategyTest - *

- * Test for the Locus Shard Strategy - */ -public class LinearLocusShardStrategyUnitTest extends BaseTest { - - private GenomeLocSortedSet mSortedSet = null; - private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); - private static final int NUMBER_OF_CHROMOSOMES = 5; - private static final int STARTING_CHROMOSOME = 1; - private static final int CHROMOSOME_SIZE = 1000; - - @Before - public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - } - - @Test - public void testSetup() { - LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1); - int counter = 0; - while(strat.hasNext()) { - Shard d = strat.next(); - assertTrue(d instanceof LocusShard); - assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size()); - assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 499); - ++counter; - } - assertTrue(counter == 10); - } - - @Test - public void testAdjustSize() { - LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1); - strat.adjustNextShardSize(1000); - int counter = 0; - while(strat.hasNext()) { - Shard d = strat.next(); - assertTrue(d instanceof LocusShard); - assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size()); - assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 999); - ++counter; - } - assertTrue(counter == 5); - } - - - @Test - public void testUnevenSplit() { - LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, -1); - int counter = 0; - while(strat.hasNext()) { - Shard d = strat.next(); - assertTrue(d instanceof LocusShard); - assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size()); - if (counter % 2 == 0) { - assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 599); - } else { - assertTrue(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart() == 399); - } - ++counter; - } - assertTrue(counter == 10); - } - - - @Test - public void testDashMOption() { - LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, 200); - int counter = 0; - while(strat.hasNext()) { - Shard d = strat.next(); - assertTrue(d instanceof LocusShard); - assertEquals("Sharding strategy must emit single locus shards",1,d.getGenomeLocs().size()); - assertEquals(199,(d.getGenomeLocs().get(0).getStop() - d.getGenomeLocs().get(0).getStart())); - ++counter; - } - assertTrue(counter == 1); - } - -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryUnitTest.java deleted file mode 100755 index 8b0791196..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactoryUnitTest.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.fail; -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.junit.*; -import static org.junit.Assert.assertTrue; - -/** - * - * User: aaron - * Date: Apr 8, 2009 - * Time: 11:31:04 AM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - - -/** - * @author aaron - * @version 1.0 - */ -public class ShardStrategyFactoryUnitTest extends BaseTest { - - private SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE); - private static final int NUMBER_OF_CHROMOSOMES = 5; - private static final int STARTING_CHROMOSOME = 1; - private static final int CHROMOSOME_SIZE = 1000; - private GenomeLocSortedSet set = null; - - - @Before - public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - set = new GenomeLocSortedSet(); - } - - @Test - public void testReadNonInterval() { - ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100); - assertTrue(st instanceof ReadShardStrategy); - } - - @Test - public void testReadInterval() { - GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); - set.add(l); - ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set); - assertTrue(st instanceof IntervalShardStrategy); - } - - @Test - public void testLinearNonInterval() { - ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100); - assertTrue(st instanceof LinearLocusShardStrategy); - } - - @Test - public void testLinearInterval() { - GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100); - set.add(l); - ShardStrategy st = ShardStrategyFactory.shatter(null,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set); - assertTrue(st instanceof LinearLocusShardStrategy); - } - -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java deleted file mode 100644 index 566c7c891..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java +++ /dev/null @@ -1,102 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator; -import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; - -import java.util.Collections; -import java.io.File; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * use this to inject into SAMDataSource for testing - */ -public class ArtificialResourcePool extends SAMResourcePool { - // How strict should we be with SAM/BAM parsing? - protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT; - - // the header - private SAMFileHeader header; - private ArtificialSAMIterator iterator; - - /** - * Track the iterator to see whether it's venturing into unmapped reads for the first - * time. If so, query straight there. Only works for query iterators. - * - * TODO: Clean up. - */ - private boolean intoUnmappedReads = false; - - public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) { - super( new Reads(Collections.emptyList()) ); - this.header = header; - this.iterator = iterator; - } - - @Override - public StingSAMIterator iterator( DataStreamSegment segment ) { - if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) { - ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator; - MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; - GenomeLoc bounds = mappedSegment.locus; - if (!this.queryOverlapping) { - queryIterator.queryContained(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop()); - } else { - queryIterator.queryOverlapping(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop()); - } - return queryIterator; - } - else if (segment instanceof UnmappedStreamSegment) { - if( !intoUnmappedReads ) { - if( iterator instanceof ArtificialSAMQueryIterator ) { - ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator; - queryIterator.queryUnmappedReads(); - } - intoUnmappedReads = true; - } - return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size); - } - else - throw new StingException("Unsupported segment type passed to test"); - } - - /** - * get the merged header - * - * @return the merged header - */ - public SAMFileHeader getHeader() { - return this.header; - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index 9e2fcca96..6546b3959 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -50,7 +50,7 @@ import java.util.List; public class SAMBAMDataSourceUnitTest extends BaseTest { private List fl; - private ReferenceSequenceFile seq; + private IndexedFastaSequenceFile seq; /** * This function does the setup of our parser, before each method call. @@ -88,8 +88,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { Reads reads = new Reads(fl); // the sharding strat. - SAMDataSource data = new IndexDrivenSAMDataSource(reads); - ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new BlockDrivenSAMDataSource(reads); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); int count = 0; try { @@ -133,8 +133,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { Reads reads = new Reads(fl); // the sharding strat. - SAMDataSource data = new IndexDrivenSAMDataSource(reads); - ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new BlockDrivenSAMDataSource(reads); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); ArrayList readcountPerShard = new ArrayList(); ArrayList readcountPerShard2 = new ArrayList(); @@ -176,8 +176,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { count = 0; // the sharding strat. - data = new IndexDrivenSAMDataSource(reads); - strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000); + data = new BlockDrivenSAMDataSource(reads); + strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); logger.debug("Pile two:"); try { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByIntervalUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByIntervalUnitTest.java deleted file mode 100644 index 3530fb068..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByIntervalUnitTest.java +++ /dev/null @@ -1,151 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; -import org.junit.Before; -import org.junit.Test; -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - *

- * Class SAMByIntervalUnitTest - *

- * Test that the SAM data source behaves well given intervals - */ -public class SAMByIntervalUnitTest extends BaseTest { - private List fl; - ShardStrategy shardStrategy; - Reads reads; - private int targetReadCount = 14; - - - // constants we use throughout the tests - protected final int READ_COUNT; - protected final int ENDING_CHROMO; - protected final int STARTING_CHROMO; - protected final int UNMAPPED_READ_COUNT; - - public SAMByIntervalUnitTest() { - READ_COUNT = 100; - ENDING_CHROMO = 10; - STARTING_CHROMO = 1; - UNMAPPED_READ_COUNT = 1000; - } - - /** - * This function does the setup of our parser, before each method call. - *

- * Called before every test case method. - */ - @Before - public void doForEachTest() { - - fl = new ArrayList(); - - // sequence - //seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta")); - //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); - - // setup the test files - fl.add(new File(validationDataLocation + "index_test.bam")); - reads = new Reads(fl); - } - - - /** run a test on data over a specific interval */ - private void testRead( int start, int stop, int readCount ) { - ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT), - ArtificialSAMUtils.mappedAndUnmappedReadIterator(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT)); - - GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); - int unmappedReadsSeen = 0; - int iterations = 0; - - IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads); - data.setResourcePool(gen); - GenomeLocSortedSet set = new GenomeLocSortedSet(); - set.add(GenomeLocParser.createGenomeLoc(0, start, stop)); - ShardStrategy strat = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, gen.getHeader().getSequenceDictionary(), UNMAPPED_READ_COUNT, set); - - StingSAMIterator iter = data.seek(strat.next()); - int count = 0; - while (iter.hasNext()) { - SAMRecord r = iter.next(); - // uncomment for debugging - System.err.println(r.getAlignmentStart() + " " + r.getAlignmentEnd()); - count++; - } - assertEquals(readCount, count); - - } - - /** - * test out that we get a single read, given the specific size - */ - @Test - public void testSingleRead() { - testRead(1,ArtificialSAMUtils.DEFAULT_READ_LENGTH,50); - } - - /** - * test out that we get the expected amount for a whole chromosome - */ - @Test - public void testChromosome() { - testRead(1, READ_COUNT, READ_COUNT); - } - - /** - * test out that we get the expected amount for a whole chromosome - */ - @Test - public void testMiddle() { - testRead(20, READ_COUNT-20,61); - } - - - private SAMFileHeader createArtificialSamHeader( int startingChr, int endingChr, int readCount, int readSize ) { - return ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, - startingChr, - readCount + readSize); - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsUnitTest.java deleted file mode 100755 index 4ed2b8d34..000000000 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsUnitTest.java +++ /dev/null @@ -1,175 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import static junit.framework.Assert.fail; -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import org.junit.Before; -import org.junit.Test; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -/** - * - * User: aaron - * Date: Apr 15, 2009 - * Time: 7:12:59 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - - -/** - * @author aaron - * @version 1.0 - */ -public class SAMByReadsUnitTest extends BaseTest { - - - private List fl; - ShardStrategy shardStrategy; - Reads reads; - private int targetReadCount = 14; - - /** - * This function does the setup of our parser, before each method call. - *

- * Called before every test case method. - */ - @Before - public void doForEachTest() { - fl = new ArrayList(); - - // sequence - //seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta")); - //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); - - // setup the test files - fl.add(new File(validationDataLocation + "index_test.bam")); - reads = new Reads(fl); - } - - - /** - * Test out that we can shard the file and iterate over every read - */ - @Test - public void testToUnmappedReads() { - ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 1, 50), - ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 1, 10)); - - GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); - try { - int unmappedReadsSeen = 0; - int iterations = 0; - - IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads); - data.setResourcePool(gen); - ++iterations; - StingSAMIterator ret = data.toUnmappedReads(100); - // count the reads we've gotten back - if (ret == null) { - fail("On iteration " + iterations + " we were returned a null pointer, after seeing " + unmappedReadsSeen + " reads out of a 1000"); - } - while (ret.hasNext()) { - ret.next(); - unmappedReadsSeen++; - } - assertEquals(10, unmappedReadsSeen); - } catch (SimpleDataSourceLoadException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); - } - } - - /** - * Test out that we can shard the file and iterate over every read - */ - @Test - public void testShardingOfReadsEvenSplit() { - ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 50), - ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10)); - GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); - targetReadCount = 5; - try { - int readCount = 0; - IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads); - - data.setResourcePool(gen); - shardStrategy = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount); - while (shardStrategy.hasNext()) { - StingSAMIterator ret = data.seek(shardStrategy.next()); - assertTrue(ret != null); - while (ret.hasNext()) { - ret.next(); - readCount++; - } - ret.close(); - } - assertEquals(20, readCount); - } catch (SimpleDataSourceLoadException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); - } - } - - /** - * Test out that we can shard the file and iterate over every read - */ - @Test - public void testShardingOfReadsOddRemainder() { - ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1, 1, 10, 100), - ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 1, 10, 10)); - GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary()); - targetReadCount = 3; - try { - int readCount = 0; - IndexDrivenSAMDataSource data = new IndexDrivenSAMDataSource(reads); - - - data.setResourcePool(gen); - shardStrategy = ShardStrategyFactory.shatter(data,null,ShardStrategyFactory.SHATTER_STRATEGY.READS, gen.getHeader().getSequenceDictionary(), targetReadCount); - while (shardStrategy.hasNext()) { - - - StingSAMIterator ret = data.seek(shardStrategy.next()); - assertTrue(ret != null); - while (ret.hasNext()) { - ret.next(); - readCount++; - } - ret.close(); - } - - // assert that we saw 2000 reads - assertEquals(20, readCount); - - } catch (SimpleDataSourceLoadException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - fail("testLinearBreakIterateAll: We Should not get a SimpleDataSourceLoadException"); - } - } - - private SAMFileHeader createArtificialSamHeader(int startingChr, int endingChr, int readCount, int readSize) { - return ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, - startingChr, - readCount + readSize); - } -} - diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java index 5ca8aa263..d63d40664 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java @@ -6,17 +6,9 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.Assert; diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIteratorUnitTest.java deleted file mode 100644 index bcb77702f..000000000 --- a/java/test/org/broadinstitute/sting/gatk/iterators/IntervalOverlapIteratorUnitTest.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.junit.Assert; -import static org.junit.Assert.fail; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.Iterator; - - -/** - * @author aaron - *

- * Class DuplicateDetectorIteratorTest - *

- * test the DuplicateDetectorIterator class. - */ -public class IntervalOverlapIteratorUnitTest extends BaseTest { - private final File bam = new File(validationDataLocation + "index_test.bam"); - private static IndexedFastaSequenceFile seq; - private int chromosomeOneReadCount = 885; - - //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); - /** - * This function does the setup of our parser, before each method call. - *

- * Called before every test case method. - */ - @BeforeClass - public static void beforeAll() { - try { - seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - } catch (FileNotFoundException e) { - fail("Unexpected Exception" + e.getLocalizedMessage()); - } - GenomeLocParser.setupRefContigOrdering(seq); - } - - @Test - public void testOverlappingIntervals() { - int countOfReads = 0; - int seqLength = seq.getSequenceDictionary().getSequence("chr1").getSequenceLength(); - GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, 470535); - - // first count the initial pile of reads - SAMFileReader reader = new SAMFileReader(bam, true); - reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT); - Iterator i = reader.queryOverlapping("chr1",1,470535); - GenomeLoc newLoc; - while (i.hasNext()) { - i.next(); - countOfReads++; - } - reader.close(); - while (last.getStart() < seq.getSequenceDictionary().getSequence("chr1").getSequenceLength()) { - reader = new SAMFileReader(bam, true); - long stop = (last.getStop() >= seqLength) ? seqLength : last.getStop() + 470535; - newLoc = GenomeLocParser.createGenomeLoc(last.getContigIndex(),last.getStart()+470535,stop); - reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT); - i = reader.queryOverlapping(newLoc.getContig(),(int)newLoc.getStart(),(int)newLoc.getStop()); - IntervalOverlapIterator iter = new IntervalOverlapIterator(StingSAMIteratorAdapter.adapt(null, i),last,false); - while(iter.hasNext()) { - countOfReads++; - iter.next(); - } - last = newLoc; - reader.close(); - - } - Assert.assertEquals(chromosomeOneReadCount,countOfReads); - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorUnitTest.java deleted file mode 100644 index 9e63b4d03..000000000 --- a/java/test/org/broadinstitute/sting/gatk/iterators/PlusOneFixIteratorUnitTest.java +++ /dev/null @@ -1,120 +0,0 @@ -package org.broadinstitute.sting.gatk.iterators; - -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.BaseTest; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.Assert; -import static org.junit.Assert.fail; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.Iterator; - -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: aaronmckenna - * Date: Nov 4, 2009 - * Time: 11:02:24 PM - */ -public class PlusOneFixIteratorUnitTest extends BaseTest { - private final File bam = new File(validationDataLocation + "index_test.bam"); - private static IndexedFastaSequenceFile seq; - private int chromosomeOneReadCount = 885; - - //GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary()); - /** - * This function does the setup of our parser, before each method call. - *

- * Called before every test case method. - */ - @BeforeClass - public static void beforeAll() { - try { - seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - } catch (FileNotFoundException e) { - fail("Unexpected Exception" + e.getLocalizedMessage()); - } - GenomeLocParser.setupRefContigOrdering(seq); - } - - /** - * there's a read starting at chr1:1108664, make our interval go from chr1 to 1108664, apply the iterator, - * and we shouldn't see the read. - */ - @Test - public void testReadAtEndOfInterval() { - int countOfReads = 0; - SAMFileReader reader = new SAMFileReader(bam, true); - final int size = 1108663; - reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT); - - GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size); - Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, 1108663))); - //Iterator i = reader.queryOverlapping("chr1", 1, size); - while (i.hasNext()) { - SAMRecord rec = i.next(); - countOfReads++; - } - Assert.assertEquals(2, countOfReads); - } - - /** - * there are 4296 reads in chr1, make sure we see them all. - * - * chr1 length = 247249719 (and no reads start at the last position - */ - @Test - public void testAllReads() { - int countOfReads = 0; - SAMFileReader reader = new SAMFileReader(bam, true); - reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT); - final int size = 247249719; - GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size); - Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", 1, size))); - //Iterator i = reader.queryOverlapping("chr1", 1, size); - while (i.hasNext()) { - SAMRecord rec = i.next(); - countOfReads++; - } - Assert.assertEquals(885, countOfReads); - } - - /** - * there are 4296 reads in chr1, make sure we see them all. - * - * chr1 length = 247249719 (and no reads start at the last position - */ - @Test - public void testAllReadsSharded() { - int countOfReads = 0; - - final int size = 247249719; - int currentStop = 0; - int incr = 100000; - GenomeLoc last = GenomeLocParser.createGenomeLoc("chr1", 1, size); - while (currentStop < size) { - SAMFileReader reader = new SAMFileReader(bam, true); - reader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT); - int lastStop = (currentStop > 1) ? currentStop: 1; - if (currentStop + incr < size) - currentStop += incr; - else - currentStop = size; - Iterator i = new PlusOneFixIterator(last, StingSAMIteratorAdapter.adapt(null, reader.queryOverlapping("chr1", lastStop, currentStop))); - - while (i.hasNext()) { - SAMRecord rec = i.next(); - countOfReads++; - } - reader.close(); - } - Assert.assertEquals(885, countOfReads); - } -} - diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index f26c026e1..4ed8a790b 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource; import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -103,54 +103,6 @@ public class TraverseReadsUnitTest extends BaseTest { } - - /** Test out that we can shard the file and iterate over every read */ - @Test - public void testMappedReadCount() { - - IndexedFastaSequenceFile ref = null; - try { - ref = new IndexedFastaSequenceFile(refFile); - } - catch (FileNotFoundException ex) { - throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex); - } - GenomeLocParser.setupRefContigOrdering(ref); - - SAMDataSource dataSource = new IndexDrivenSAMDataSource(new Reads(bamList)); - dataSource.viewUnmappedReads(false); - ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS, - ref.getSequenceDictionary(), - readSize); - - countReadWalker.initialize(); - Object accumulator = countReadWalker.reduceInit(); - - while (shardStrategy.hasNext()) { - Shard shard = shardStrategy.next(); - - if (shard == null) { - fail("Shard == null"); - } - - ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null); - accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); - dataProvider.close(); - - } - - traversalEngine.printOnTraversalDone("reads", accumulator); - countReadWalker.onTraversalDone(accumulator); - - if (!(accumulator instanceof Integer)) { - fail("Count read walker should return an interger."); - } - if (((Integer) accumulator) != 9721) { - fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) ); - } - } - - /** Test out that we can shard the file and iterate over every read */ @Test public void testUnmappedReadCount() { @@ -163,9 +115,8 @@ public class TraverseReadsUnitTest extends BaseTest { } GenomeLocParser.setupRefContigOrdering(ref); - SAMDataSource dataSource = new IndexDrivenSAMDataSource(new Reads(bamList)); - dataSource.viewUnmappedReads(true); - ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS, + SAMDataSource dataSource = new BlockDrivenSAMDataSource(new Reads(bamList)); + ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), readSize);