diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java index 60e7552b2..269d8ae84 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamPointer.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.samtools.SAMFileReader; @@ -145,9 +146,10 @@ class MappedReadStreamPointer extends ReadStreamPointer { // The getStop() + 1 is a hack to work around an old bug in the way Picard created SAM files where queries // over a given interval would occasionally not pick up the last read in that interval. - mergingIterator.queryOverlapping( mappedSegment.locus.getContig(), - (int)mappedSegment.locus.getStart(), - (int)mappedSegment.locus.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); + GenomeLoc bounds = mappedSegment.getBounds(); + mergingIterator.queryOverlapping( bounds.getContig(), + (int)bounds.getStart(), + (int)bounds.getStop()+ PlusOneFixIterator.PLUS_ONE_FIX_CONSTANT); return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); } @@ -163,9 +165,10 @@ class MappedReadStreamPointer extends ReadStreamPointer { MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo ); // NOTE: explicitly not using the queryOverlapping hack above since, according to the above criteria, // we'd only miss reads that are one base long when performing a contained query. - mergingIterator.queryContained( mappedSegment.locus.getContig(), - (int)mappedSegment.locus.getStart(), - (int)mappedSegment.locus.getStop()+1); + GenomeLoc bounds = mappedSegment.getBounds(); + mergingIterator.queryContained( bounds.getContig(), + (int)bounds.getStart(), + (int)bounds.getStop()+1); return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index fa75f165c..9204d92cc 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -67,7 +67,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * @return Iterator through the data. */ public Iterator seek( Shard shard ) { - SeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(shard.getGenomeLocs()) ); + DataStreamSegment dataStreamSegment = shard.getGenomeLocs().size() != 0 ? new MappedStreamSegment(shard.getGenomeLocs()) : new EntireStream(); + SeekableRODIterator iterator = iteratorPool.iterator(dataStreamSegment); return iterator; } @@ -108,20 +109,27 @@ class ReferenceOrderedDataPool extends ResourcePool resources ) { - if( !(segment instanceof MappedStreamSegment) ) - throw new StingException("Reference-ordered data cannot utilitize unmapped segments."); - - GenomeLoc position = ((MappedStreamSegment)segment).locus; - //######################################### + if(segment instanceof MappedStreamSegment) { + GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); + //######################################### //## System.out.printf("Searching for iterator at locus %s; %d resources available%n", position, resources.size()); - for( SeekableRODIterator iterator: resources ) { + for( SeekableRODIterator iterator: resources ) { //##System.out.printf("Examining iterator at position %s [last query location: %s]%n", iterator.position(),iterator.lastQueryLocation()); - if( (iterator.position() == null && iterator.hasNext()) || - (iterator.position() != null && iterator.position().isBefore(position)) ) - return iterator; - } + if( (iterator.position() == null && iterator.hasNext()) || + (iterator.position() != null && iterator.position().isBefore(position)) ) + return iterator; + } //##System.out.printf("Failed to find iterator at locus %s%n", position); - return null; + return null; + } + else if(segment instanceof EntireStream) { + // Asking for a segment over the entire stream, so by definition, there is no best existing resource. + // Force the system to create a new one. + return null; + } + else { + throw new StingException("Unable to find a ROD iterator for segments of type " + segment.getClass()); + } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java index 776e09446..7ab281f9e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ResourcePool.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.List; import java.util.ArrayList; @@ -169,11 +170,39 @@ class EntireStream implements DataStreamSegment { * Models a mapped position within a stream of GATK input data. */ class MappedStreamSegment implements DataStreamSegment { - public final GenomeLoc locus; + public final List loci; + + /** + * Retrieves the first location covered by a mapped stream segment. + * @return Location of the first base in this segment. + */ + public GenomeLoc getFirstLocation() { + GenomeLoc firstLocus = loci.get(0); + return GenomeLocParser.createGenomeLoc(firstLocus.getContigIndex(),firstLocus.getStart()); + } + + /** + * Get the total range of the given mapped stream segment. + * @return A GenomeLoc consisting of the first base of the first locus to the last base of the last locus, inclusive. + */ + public GenomeLoc getBounds() { + GenomeLoc firstLocus = loci.get(0); + GenomeLoc lastLocus = loci.get(loci.size()-1); + return GenomeLocParser.createGenomeLoc(getFirstLocation().getContigIndex(),firstLocus.getStart(),lastLocus.getStop()); + } + public MappedStreamSegment( List loci ) { - if(loci.size() > 1) - throw new StingException("MappedStreamSegments cannot apply to a range of loci"); - this.locus = !loci.isEmpty() ? loci.get(0) : null; + // Validate that the list of loci is non-empty. + if(loci.size() == 0) + throw new StingException("Cannot map to a locus of length 0."); + + // Validate that all loci in the given list are from the same contig. + int contigIndex = loci.get(0).getContigIndex(); + for(GenomeLoc locus: loci) { + if(contigIndex != locus.getContigIndex()) + throw new StingException("All loci in a MappedStreamSegment must be on the same contig."); + } + this.loci = loci; } } diff --git a/java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java b/java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java index 94e5187df..9d4d62892 100644 --- a/java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/examples/HelloWalker.java @@ -36,12 +36,12 @@ public class HelloWalker extends LocusWalker { * which they fall, and the base from the reference that overlaps. * @param tracker The accessor for reference metadata. * @param ref The reference base that lines up with this locus. - * @param locus Information about reads overlapping this locus. + * @param context Information about reads aligning to this locus. * @return In this case, returns a count of how many loci were seen at this site (1). */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - out.printf("Hello locus %s; your ref base is %c and you have %d reads%n", context.getLocation(), ref.getBase(), context.getReads().size() ); + out.printf("Hello locus %s; your ref base is %c and you have %d reads%n", context.getLocation(), ref.getBase(), context.getBasePileup().size() ); return 1; } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java index 0aa799cec..6cbc5b070 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ArtificialResourcePool.java @@ -5,6 +5,7 @@ import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator; import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; @@ -68,10 +69,11 @@ public class ArtificialResourcePool extends SAMResourcePool { if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) { ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator; MappedStreamSegment mappedSegment = (MappedStreamSegment)segment; + GenomeLoc bounds = mappedSegment.getBounds(); if (!this.queryOverlapping) { - queryIterator.queryContained(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop()); + queryIterator.queryContained(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop()); } else { - queryIterator.queryOverlapping(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop()); + queryIterator.queryOverlapping(bounds.getContig(), (int)bounds.getStart(), (int)bounds.getStop()); } return queryIterator; }