diff --git a/java/src/net/sf/picard/sam/MergingSamRecordIterator.java b/java/src/net/sf/picard/sam/MergingSamRecordIterator.java index 469b93b5e..6160d1301 100644 --- a/java/src/net/sf/picard/sam/MergingSamRecordIterator.java +++ b/java/src/net/sf/picard/sam/MergingSamRecordIterator.java @@ -36,7 +36,7 @@ import net.sf.samtools.util.CloseableIterator; * iterable stream. The underlying iterators/files must all have the same sort order unless * the requested output format is unsorted, in which case any combination is valid. */ -public class MergingSamRecordIterator implements Iterator { +public class MergingSamRecordIterator implements CloseableIterator { private final PriorityQueue pq; private final SamFileHeaderMerger samHeaderMerger; private final SAMFileHeader.SortOrder sortOrder; @@ -44,7 +44,7 @@ public class MergingSamRecordIterator implements Iterator { /** * Maps iterators back to the readers from which they are derived. */ - private final Map,SAMFileReader> iteratorToSourceMap = new HashMap,SAMFileReader>(); + private final Map,SAMFileReader> iteratorToSourceMap = new HashMap,SAMFileReader>(); /** * Constructs a new merging iterator with the same set of readers and sort order as @@ -94,6 +94,15 @@ public class MergingSamRecordIterator implements Iterator { return readerToIteratorMap; } + /** + * Close down all open iterators. + */ + public void close() { + // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue. + for(CloseableIterator iterator: pq) + iterator.close(); + } + /** Returns true if any of the underlying iterators has more records, otherwise false. */ public boolean hasNext() { return !this.pq.isEmpty(); diff --git a/java/src/net/sf/samtools/BAMFileReader2.java b/java/src/net/sf/samtools/BAMFileReader2.java index 157e44645..e2c4b8aed 100644 --- a/java/src/net/sf/samtools/BAMFileReader2.java +++ b/java/src/net/sf/samtools/BAMFileReader2.java @@ -31,10 +31,7 @@ import net.sf.samtools.util.StringLineReader; import net.sf.samtools.SAMFileReader.ValidationStringency; import java.io.*; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.NoSuchElementException; +import java.util.*; import java.net.URL; /** @@ -185,7 +182,7 @@ class BAMFileReader2 } public List getOverlappingBins(final String sequence, final int start, final int end) { - List bins = null; + List bins = Collections.emptyList(); final SAMFileHeader fileHeader = getFileHeader(); int referenceIndex = fileHeader.getSequenceIndex(sequence); @@ -402,6 +399,8 @@ class BAMFileReader2 public SAMRecord next() { final SAMRecord result = mNextRecord; + if(result.getAlignmentStart() <= 11632602 && result.getAlignmentEnd() >= 11632602) + System.out.printf("11632602: %s%n", result.getReadName()); advance(); return result; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 1831b8a08..71c6d595b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -112,8 +112,7 @@ public abstract class LocusView extends LocusIterator implements View { * @return True if another locus context is bounded by this shard. */ protected boolean hasNextLocus() { - GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null; - return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus)); + return nextLocus != null; } /** @@ -122,25 +121,17 @@ public abstract class LocusView extends LocusIterator implements View { * @throw NoSuchElementException if the next element is missing. */ protected AlignmentContext nextLocus() { - GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null; - - if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) ) + if(nextLocus == null) throw new NoSuchElementException("No more elements remain in locus context queue."); // Cache the current and apply filtering. AlignmentContext current = nextLocus; // Find the next. - if( loci.hasNext() ) { - nextLocus = loci.next(); - if( sourceInfo.getDownsampleToCoverage() != null ) - current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); - if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) ) - nextLocus = null; - } - else - nextLocus = null; - + seedNextLocus(); + if( sourceInfo.getDownsampleToCoverage() != null ) + current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() ); + // if the current loci isn't null, get the overflow tracker and pass it to the alignment context if ((this.loci != null)) current.setLocusOverflowTracker(loci.getLocusOverflowTracker()); @@ -152,21 +143,56 @@ public abstract class LocusView extends LocusIterator implements View { */ private void seedNextLocus() { //System.out.printf("loci is %s%n", loci); - if( loci.hasNext() ) - nextLocus = loci.next(); + if( !loci.hasNext() ) { + nextLocus = null; + return; + } + + nextLocus = loci.next(); // If the location of this shard is available, trim the data stream to match the shard. if(!shard.getGenomeLocs().isEmpty()) { - // Iterate past cruft at the beginning to the first locus in the shard. - while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() ) + // Iterate through any elements not contained within this shard. + while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() ) nextLocus = loci.next(); // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null. - if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) ) + if( nextLocus != null && (isBeforeShard(nextLocus.getLocation()) || isAfterShard(nextLocus.getLocation())) ) nextLocus = null; } } + /** + * Is this location before the given shard. + * @param location Location to check. + * @return True if the given location is before the start of the shard. False otherwise. + */ + private boolean isBeforeShard(GenomeLoc location) { + return location.isBefore(shard.getGenomeLocs().get(0)); + } + + /** + * Is this location after the given shard. + * @param location Location to check. + * @return True if the given location is after the end of the shard. False otherwise. + */ + private boolean isAfterShard(GenomeLoc location) { + return location.isPast(shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1)); + } + + /** + * Is this location contained in the given shard. + * @param location Location to check. + * @return True if the given location is contained within the shard. False otherwise. + */ + private boolean isContainedInShard(GenomeLoc location) { + for(GenomeLoc shardLocation: shard.getGenomeLocs()) { + if(shardLocation.containsP(location)) + return true; + } + return false; + } + /** * Class to filter out un-handle-able reads from the stream. We currently are skipping * unmapped reads, non-primary reads, unaligned reads, and duplicate reads. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index 3b37ed328..40ca60d48 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -11,6 +11,7 @@ import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.picard.sam.MergingSamRecordIterator; +import net.sf.picard.filter.FilteringIterator; import java.util.*; import java.io.File; @@ -127,9 +128,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { readerToIteratorMap.put(reader,reader.iterator(chunks)); } - MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true); + // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set. + MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true); + FilteringIterator filteringIterator = new FilteringIterator(mergingIterator,new IntervalOverlappingFilter(shard.getGenomeLocs())); + return applyDecoratingIterators(enableVerification, - StingSAMIteratorAdapter.adapt(reads,iterator), + StingSAMIteratorAdapter.adapt(reads,filteringIterator), reads.getDownsamplingFraction(), reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), reads.getSupplementalFilters()); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IntervalOverlappingFilter.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IntervalOverlappingFilter.java new file mode 100644 index 000000000..b90f05b4d --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IntervalOverlappingFilter.java @@ -0,0 +1,40 @@ +package org.broadinstitute.sting.gatk.datasources.simpleDataSources; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.List; + +/** + * Filters reads out of a data stream that don't overlap with the given list of locations. + * + * @author mhanna + * @version 0.1 + */ +public class IntervalOverlappingFilter implements SamRecordFilter { + /** + * The list of locations containing reads to keep. + */ + private final List intervals; + + public IntervalOverlappingFilter(List intervals) { + this.intervals = intervals; + } + + /** + * Filter out this record if it doesn't appear in the interval list. + * @param read The read to examine. + * @return True to filter the read out. False otherwise. + */ + public boolean filterOut(SAMRecord read) { + GenomeLoc readLocation = GenomeLocParser.createGenomeLoc(read); + for(GenomeLoc interval: intervals) { + if(interval.overlapsP(readLocation)) + return false; + } + return true; + } + +}