diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 8801875cc..062c05b31 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -35,6 +35,8 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; @@ -571,11 +573,26 @@ public class GenomeAnalysisEngine { ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals, Integer maxIterations) { - long SHARD_SIZE = 100000L; + if(!readsDataSource.hasIndex()) { + if(!getArguments().unsafe || intervals != null) + throw new StingException("The GATK cannot currently process unindexed BAM files"); + + Shard.ShardType shardType; + if(walker instanceof LocusWalker) + shardType = Shard.ShardType.LOCUS; + else if(walker instanceof ReadWalker || walker instanceof DuplicateWalker) + shardType = Shard.ShardType.READ; + else + throw new StingException("The GATK cannot currently process unindexed BAM files"); + + return new MonolithicShardStrategy(shardType); + } ShardStrategy shardStrategy = null; ShardStrategyFactory.SHATTER_STRATEGY shardType; + long SHARD_SIZE = 100000L; + if (walker instanceof LocusWalker) { if (walker instanceof RodWalker) SHARD_SIZE *= 1000; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index b1eae4a0a..ad9ed191b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -97,10 +97,11 @@ public class LocusReferenceView extends ReferenceView { windowStop = 0; } - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - - initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + if(bounds != null) { + long expandedStart = getWindowStart( bounds ); + long expandedStop = getWindowStop( bounds ); + initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + } } /** @@ -120,7 +121,16 @@ public class LocusReferenceView extends ReferenceView { validateLocation( genomeLoc ); GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); - char[] bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)(window.getStart() - getWindowStart(bounds)), (int)window.size() ).toCharArray(); + char[] bases = null; + + if(bounds != null) { + bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)(window.getStart() - getWindowStart(bounds)), (int)window.size() ).toCharArray(); + } + else { + if(referenceSequence == null || referenceSequence.getContigIndex() != genomeLoc.getContigIndex()) + referenceSequence = reference.getSequence(genomeLoc.getContig()); + bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)window.getStart()-1, (int)window.size()).toCharArray(); + } return new ReferenceContext( genomeLoc, window, bases ); } @@ -139,11 +149,10 @@ public class LocusReferenceView extends ReferenceView { * @param genomeLoc location to verify. */ private void validateLocation( GenomeLoc genomeLoc ) throws InvalidPositionException { - // if( !genomeLoc.isSingleBP() ) throw new InvalidPositionException( String.format("Requested position larger than one base; start = %d, stop = %d", genomeLoc.getStart(), genomeLoc.getStop())); - if( !bounds.containsP(genomeLoc) ) + if( bounds != null && !bounds.containsP(genomeLoc) ) throw new InvalidPositionException( String.format("Requested position %s not within interval %s", genomeLoc, bounds)); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 4e02f777d..576fb32eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -4,7 +4,6 @@ import net.sf.picard.filter.FilteringIterator; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.LocusIterator; @@ -112,7 +111,7 @@ public abstract class LocusView extends LocusIterator implements View { * @return True if another locus context is bounded by this shard. */ protected boolean hasNextLocus() { - return nextLocus != null && !nextLocus.getLocation().isPast(shard.getGenomeLoc()); + return nextLocus != null && (shard.getGenomeLoc() == null || !nextLocus.getLocation().isPast(shard.getGenomeLoc())); } /** @@ -121,7 +120,7 @@ public abstract class LocusView extends LocusIterator implements View { * @throw NoSuchElementException if the next element is missing. */ protected AlignmentContext nextLocus() { - if( nextLocus == null || nextLocus.getLocation().isPast(shard.getGenomeLoc()) ) + if( nextLocus == null || (shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc())) ) throw new NoSuchElementException("No more elements remain in locus context queue."); // Cache the current and apply filtering. @@ -132,7 +131,7 @@ public abstract class LocusView extends LocusIterator implements View { nextLocus = loci.next(); if( sourceInfo.getDownsampleToCoverage() != null ) current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); - if( nextLocus.getLocation().isPast(shard.getGenomeLoc()) ) + if( shard.getGenomeLoc() != null && nextLocus.getLocation().isPast(shard.getGenomeLoc()) ) nextLocus = null; } else @@ -149,13 +148,16 @@ public abstract class LocusView extends LocusIterator implements View { if( loci.hasNext() ) nextLocus = loci.next(); - // Iterate past cruft at the beginning to the first locus in the shard. - while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() ) - nextLocus = loci.next(); + // If the location of this shard is available, trim the data stream to match the shard. + if(shard.getGenomeLoc() != null) { + // Iterate past cruft at the beginning to the first locus in the shard. + while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) && loci.hasNext() ) + nextLocus = loci.next(); - // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null. - if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) ) - nextLocus = null; + // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null. + if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLoc()) ) + nextLocus = null; + } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java index d993016d5..b49350379 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java @@ -61,4 +61,13 @@ public class LocusShard implements Shard { public static LocusShard toShard(GenomeLoc loc) { return new LocusShard(loc); } + + /** + * String representation of this shard. + * @return A string representation of the boundaries of this shard. + */ + @Override + public String toString() { + return mLoc.toString(); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java new file mode 100644 index 000000000..e84d91faf --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java @@ -0,0 +1,51 @@ +package org.broadinstitute.sting.gatk.datasources.shards; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; + +/** + * A single, monolithic shard bridging all available data. + * @author mhanna + * @version 0.1 + */ +public class MonolithicShard implements Shard { + /** + * What type of MonolithicShard is this? Read or locus? + */ + private ShardType shardType; + + /** + * Creates a new monolithic shard of the given type. + * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. + */ + public MonolithicShard(ShardType shardType) { + if(shardType != ShardType.LOCUS && shardType != ShardType.READ) + throw new StingException("Invalid shard type for monolithic shard: " + shardType); + this.shardType = shardType; + } + + /** + * Returns null, indicating that (in this case) the entire genome is covered. + * @return null. + */ + public GenomeLoc getGenomeLoc() { + return null; + } + + /** + * Reports the type of monolithic shard. + * @return Type of monolithic shard. + */ + public ShardType getShardType() { + return shardType; + } + + /** + * String representation of this shard. + * @return "entire genome". + */ + @Override + public String toString() { + return "entire genome"; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java new file mode 100644 index 000000000..b90f3d147 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java @@ -0,0 +1,76 @@ +package org.broadinstitute.sting.gatk.datasources.shards; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * Create a giant shard representing all the data in the input BAM(s). + * + * @author mhanna + * @version 0.1 + */ +public class MonolithicShardStrategy implements ShardStrategy { + /** + * The single shard associated with this sharding strategy. + */ + private MonolithicShard shard; + + /** + * Create a new shard strategy for shards of the given type. + * @param shardType The shard type. + */ + public MonolithicShardStrategy(Shard.ShardType shardType) { + shard = new MonolithicShard(shardType); + } + + /** + * Convenience for using in a foreach loop. Will NOT create a new, reset instance of the iterator; + * will only return another copy of the active iterator. + * @return A copy of this. + */ + public Iterator iterator() { + return this; + } + + /** + * Returns true if the monolithic shard has not yet been consumed, or false otherwise. + * @return True if shard has been consumed, false otherwise. + */ + public boolean hasNext() { + return shard != null; + } + + /** + * Returns the monolithic shard if it has not already been retrieved. + * @return The monolithic shard. + * @throws NoSuchElementException if no such data exists. + */ + public Shard next() { + if(shard == null) + throw new NoSuchElementException("Monolithic shard has already been retrived."); + + Shard working = shard; + shard = null; + return working; + } + + /** + * Mandated by the interface, but is unsupported in this context. Will throw an exception always. + */ + public void remove() { + throw new UnsupportedOperationException("Cannot remove from a shard strategy"); + } + + /** + * Mandated by the interface, but is unsupported in this context. Will throw an exception always. + * @param size adjust the next size to this + */ + public void adjustNextShardSize( long size ) { + throw new UnsupportedOperationException("Cannot adjust the next size of a monolithic shard; there will be no next shard."); + } + +} + diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java index 26ebf7b53..e66aceabc 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java @@ -34,7 +34,11 @@ import org.broadinstitute.sting.utils.GenomeLocSortedSet; */ public class ShardStrategyFactory { public enum SHATTER_STRATEGY { - LINEAR, EXPONENTIAL, READS, INTERVAL + LINEAR, + EXPONENTIAL, + READS, + INTERVAL, + MONOLITHIC // Put all of the available data into one shard. } /** our log, which we want to capture anything from this class */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java index c69b74b6f..60e7552b2 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java @@ -98,7 +98,7 @@ abstract class ReadStreamPointer { * @param segment Segment to check for overlaps. * @return An iterator over all reads overlapping the given segment. */ - public abstract StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ); + public abstract StingSAMIterator getReadsOverlapping( DataStreamSegment segment ); /** * Get a stream of all the reads that are completely contained by a given segment. @@ -136,14 +136,18 @@ class MappedReadStreamPointer extends ReadStreamPointer { * {@inheritDoc} */ @Override - public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) { + public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { + if(!(segment instanceof MappedStreamSegment)) + throw new UnsupportedOperationException("MappedReadStreamPointer cannot get reads overlapping an unmapped stream segment"); + MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; + MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); // The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries // over a given interval would occasionally not pick up the last read in that interval. - mergingIterator.queryOverlapping( segment.locus.getContig(), - (int)segment.locus.getStart(), - (int)segment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); + mergingIterator.queryOverlapping( mappedSegment.locus.getContig(), + (int)mappedSegment.locus.getStart(), + (int)mappedSegment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); } @@ -206,7 +210,7 @@ class UnmappedReadStreamPointer extends ReadStreamPointer { * {@inheritDoc} */ @Override - public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) { + public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { throw new UnsupportedOperationException("Unable to determine overlapped reads of an unmapped segment"); } @@ -244,4 +248,57 @@ class UnmappedReadStreamPointer extends ReadStreamPointer { // Don't destroy the iterator; reuse it. } +} + +class EntireReadStreamPointer extends ReadStreamPointer { + /** + * Create a new pointer that can return info about the entire read stream. + * @param sourceInfo Source info for the reads. + * @param headerMerger Header merging apparatus. + * + */ + public EntireReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) { + super( sourceInfo, headerMerger ); + } + + /** + * An EntireReadStreamPointer can only efficiently access the entire file. + * @param segment Segment to test. + * @return true if requesting the entire stream. + */ + public boolean canAccessSegmentEfficiently(DataStreamSegment segment) { + return segment instanceof EntireStream; + } + + /** + * Get a stream of all the reads that overlap a given segment. + * @param segment Segment to check for overlaps. + * @return An iterator over all reads overlapping the given segment. + */ + @Override + public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { + if(!(segment instanceof EntireStream)) + throw new StingException("EntireReadStreamPointer can only get reads overlapping the entire stream."); + return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo)); + } + + /** + * Get a stream of all the reads that are completely contained by a given segment. + * @param segment Segment to check for containment.. + * @return An iterator over all reads contained by the given segment. + */ + @Override + public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) { + if(!(segment instanceof EntireStream)) + throw new StingException("EntireReadStreamPointer can only get reads contained by the entire stream."); + return StingSAMIteratorAdapter.adapt(sourceInfo,new MergingSamRecordIterator2(headerMerger, sourceInfo)); + } + + /** + * {@inheritDoc} + */ + public void destroy( StingSAMIterator iterator ) { + iterator.close(); + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java index 1eee99679..f11169a3e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.StingException; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMReadGroupRecord; @@ -47,6 +46,22 @@ import java.io.File; class ReadStreamResource { final static boolean eagerDecode = true; + /** + * Do all the constituent components of this ReadStreamResource have indices? + * In general, BAM files without indices are not supported, but in a few specific + * cases we do allow this for the Picard pipeline. + * @return true if all BAM files have indices; false otherwise. + */ + protected boolean hasIndex() { + for(SAMFileReader reader: readStreamPointer.getHeaderMerger().getReaders()) { + if(!reader.hasIndex()) + return false; + } + return true; + } + + protected final boolean hasIndex; + /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(ReadStreamPointer.class); @@ -64,7 +79,18 @@ class ReadStreamResource { SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate); this.header = headerMerger.getMergedHeader(); - readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger); + + boolean indexPresent = true; + for(SAMFileReader reader: headerMerger.getReaders()) { + if(!reader.hasIndex()) + indexPresent = false; + } + hasIndex = indexPresent; + + if(hasIndex) + readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger); + else + readStreamPointer = new EntireReadStreamPointer(sourceInfo, headerMerger); } /** @@ -109,7 +135,7 @@ class ReadStreamResource { } - public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) { + public StingSAMIterator getReadsOverlapping( DataStreamSegment segment ) { return readStreamPointer.getReadsOverlapping(segment); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 5a9e61068..9f077527e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -159,6 +159,12 @@ abstract class ResourcePool { interface DataStreamSegment { } +/** + * Models the entire stream of data. + */ +class EntireStream implements DataStreamSegment { +} + /** * Models a mapped position within a stream of GATK input data. */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 2dde22630..4c68c7c55 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -10,6 +10,7 @@ import net.sf.picard.sam.SamFileHeaderMerger; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.GenomeLoc; @@ -112,6 +113,15 @@ public class SAMDataSource implements SimpleDataSource { resourcePool = new SAMResourcePool(reads); } + /** + * Do all BAM files backing this data source have an index? The case where hasIndex() is false + * is supported, but only in a few extreme cases. + * @return True if an index is present; false otherwise. + */ + public boolean hasIndex() { + return resourcePool.hasIndex; + } + /** * Gets the (potentially merged) SAM file header. * @@ -150,14 +160,14 @@ public class SAMDataSource implements SimpleDataSource { StingSAMIterator iterator = null; if (shard.getShardType() == Shard.ShardType.READ) { - iterator = seekRead((ReadShard) shard); + iterator = seekRead(shard); iterator = applyDecoratingIterators(true, iterator, reads.getDownsamplingFraction(), reads.getSafetyChecking(), reads.getSupplementalFilters()); } else if (shard.getShardType() == Shard.ShardType.LOCUS) { - iterator = seekLocus(shard.getGenomeLoc()); + iterator = seekLocus(shard); iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), @@ -165,7 +175,7 @@ public class SAMDataSource implements SimpleDataSource { reads.getSupplementalFilters()); } else if ((shard.getShardType() == Shard.ShardType.LOCUS_INTERVAL) || (shard.getShardType() == Shard.ShardType.READ_INTERVAL)) { - iterator = seekLocus(shard.getGenomeLoc()); + iterator = seekLocus(shard); iterator = applyDecoratingIterators(false, iterator, reads.getDownsamplingFraction(), @@ -190,17 +200,19 @@ public class SAMDataSource implements SimpleDataSource { * seekLocus *

* - * @param location the genome location to extract data for + * @param shard the shard containing the genome location to extract data for * * @return an iterator for that region */ - private StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException { + private StingSAMIterator seekLocus( Shard shard ) throws SimpleDataSourceLoadException { + if(shard instanceof MonolithicShard) + return createIterator(new EntireStream()); + if( getHeader().getSequenceDictionary().getSequences().size() == 0 ) throw new StingException("Unable to seek to the given locus; reads data source has no alignment information."); - return createIterator( new MappedStreamSegment(location) ); + return createIterator( new MappedStreamSegment(shard.getGenomeLoc()) ); } - /** *

* seek @@ -210,7 +222,11 @@ public class SAMDataSource implements SimpleDataSource { * * @return an iterator for that region */ - private StingSAMIterator seekRead( ReadShard shard ) throws SimpleDataSourceLoadException { + private StingSAMIterator seekRead( Shard shard ) throws SimpleDataSourceLoadException { + if(shard instanceof MonolithicShard) + return createIterator(new EntireStream()); + + ReadShard readShard = (ReadShard)shard; StingSAMIterator iter = null; // If there are no entries in the sequence dictionary, there can't possibly be any unmapped reads. Force state to 'unmapped'. @@ -221,22 +237,22 @@ public class SAMDataSource implements SimpleDataSource { if (lastReadPos == null) { lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE); iter = createIterator(new MappedStreamSegment(lastReadPos)); - return InitialReadIterator(shard.getSize(), iter); + return InitialReadIterator(readShard.getSize(), iter); } else { lastReadPos = GenomeLocParser.setStop(lastReadPos,-1); - iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos)))); + iter = fastMappedReadSeek(readShard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos)))); } if (intoUnmappedReads && !includeUnmappedReads) - shard.signalDone(); + readShard.signalDone(); } if (intoUnmappedReads && includeUnmappedReads) { if (iter != null) iter.close(); - iter = toUnmappedReads(shard.getSize()); + iter = toUnmappedReads(readShard.getSize()); if (!iter.hasNext()) - shard.signalDone(); + readShard.signalDone(); } return iter; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java index b2ac4ea71..b10c73adf 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java @@ -44,18 +44,24 @@ import java.util.List; * @version 0.1 */ class SAMResourcePool extends ResourcePool { + /** our log, which we want to capture anything from this class */ + protected static Logger logger = Logger.getLogger(SAMResourcePool.class); + /** Source information about the reads. */ protected Reads reads; protected SamFileHeaderMerger headerMerger; + /** + * Do all the constituent BAM files have indices? We support some very limited + * cases where not all BAM files have indices available. + */ + protected final boolean hasIndex; + /** Is this a by-reads traversal or a by-locus? */ protected boolean queryOverlapping; /** File header for the combined file. */ - protected SAMFileHeader header; - - /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(SAMResourcePool.class); + protected final SAMFileHeader header; public SAMResourcePool( Reads reads ) { this.reads = reads; @@ -64,6 +70,8 @@ class SAMResourcePool extends ResourcePool ReadStreamResource streamResource = createNewResource(); this.header = streamResource.getHeader(); this.headerMerger = streamResource.getHeaderMerger(); + this.hasIndex = streamResource.hasIndex(); + // Add this resource to the pool. this.addNewResource(streamResource); } @@ -106,11 +114,8 @@ class SAMResourcePool extends ResourcePool if (!queryOverlapping) iterator = streamResource.getReadsContainedBy(segment); - else { - if (!( segment instanceof MappedStreamSegment )) - throw new StingException("Segment is unmapped; true overlaps cannot be determined."); - iterator = streamResource.getReadsOverlapping((MappedStreamSegment) segment); - } + else + iterator = streamResource.getReadsOverlapping(segment); return new ReleasingIterator( streamResource, iterator ); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 6f5d65a0c..d0cf22977 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -40,7 +40,7 @@ public class TraverseLoci extends TraversalEngine { Shard shard, ShardDataProvider dataProvider, T sum ) { - logger.debug(String.format("TraverseLoci.traverse Genomic interval is %s", shard.getGenomeLoc())); + logger.debug(String.format("TraverseLoci.traverse: Shard is %s", shard)); if ( !(walker instanceof LocusWalker) ) throw new IllegalArgumentException("Walker isn't a loci walker!");