diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index 90e8a2c3a..b36c59a2c 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -42,6 +42,11 @@ public class AllLocusView extends LocusView { */ private AlignmentContext nextLocus = null; + /** + * Signal not to advance the iterator because we're currently sitting at the next element. + */ + private boolean atNextElement = false; + /** * Create a new queue of locus contexts. * @param provider @@ -50,49 +55,80 @@ public class AllLocusView extends LocusView { super( provider ); // Seed the state tracking members with the first possible seek position and the first possible locus context. locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus()); - if( locusIterator.hasNext() ) { - // cache next position and next alignment context - nextPosition = locusIterator.next(); - nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition); - } } public boolean hasNext() { + advance(); return nextPosition != null; } public AlignmentContext next() { + advance(); - GenomeLoc currentPosition = nextPosition; - if( currentPosition == null ) + if(nextPosition == null) throw new NoSuchElementException("No next is available in the all locus view"); - // Crank the iterator to (if possible) or past the next context. - while( nextLocus != null && nextLocus.getLocation().isBefore(currentPosition) && hasNextLocus() ) - nextLocus = nextLocus(); + // Flag to the iterator that no data is waiting in the queue to be processed. + atNextElement = false; - AlignmentContext currentLocus = null; // context we are gonna return + AlignmentContext currentLocus; // If actual data is present, return it. Otherwise, return empty data. - if( nextLocus != null && nextLocus.getLocation().equals(currentPosition) ) { - currentLocus = nextLocus; // found alignment context at the current position - nextLocus = hasNextLocus() ? nextLocus() : null; - } + if( nextLocus != null && nextLocus.getLocation().equals(nextPosition) ) + currentLocus = nextLocus; else - currentLocus = createEmptyLocus( currentPosition ); - - // Determine the next locus. The trick is that we may have more than one alignment context at the same - // reference position (regular base pileup, then extended pileup). If next alignment context (that we just pre-read) - // is still at the current position, we do not increment current position and wait for next call to next() to return - // that context. If we know that next context is past the current position, we are done with current - // position - if ( nextLocus == null || ! nextLocus.getLocation().equals(currentPosition) ) - nextPosition = locusIterator.hasNext() ? locusIterator.next() : null; - + currentLocus = createEmptyLocus(nextPosition); return currentLocus; } + private void advance() { + // Already at the next element? Don't move forward. + if(atNextElement) + return; + + // Out of elements? + if(nextPosition == null && !locusIterator.hasNext()) + return; + + // If nextLocus has been consumed, clear it out to make room for the next incoming locus. + if(nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) { + nextLocus = null; + + // Determine the next locus. The trick is that we may have more than one alignment context at the same + // reference position (regular base pileup, then extended pileup). If next alignment context (that we just pre-read) + // is still at the current position, we do not increment current position and wait for next call to next() to return + // that context. If we know that next context is past the current position, we are done with current + // position + if(hasNextLocus()) { + nextLocus = nextLocus(); + if(nextPosition.equals(nextLocus.getLocation())) { + atNextElement = true; + return; + } + } + } + + // No elements left in queue? Clear out the position state tracker and return. + if(!locusIterator.hasNext()) { + nextPosition = null; + return; + } + + // Actually fill the next position. + nextPosition = locusIterator.next(); + atNextElement = true; + + // Crank the iterator to (if possible) or past the next context. Be careful not to hold a reference to nextLocus + // while using the hasNextLocus() / nextLocus() machinery; this will cause us to use more memory than is optimal. + while(nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) { + nextLocus = null; + if(!hasNextLocus()) + break; + nextLocus = nextLocus(); + } + } + /** * Creates a blank locus context at the specified location. * @param site Site at which to create the blank locus context. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 84b5a5463..b467a2ab5 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -50,8 +50,9 @@ public abstract class LocusView extends LocusIterator implements View { private LocusIterator loci; /** - * The next locus context from the iterator. This value must always be within - * the shard; if its null, there's nothing for the consumer to look at. + * The next locus context from the iterator. Lazy loaded: if nextLocus is null and advance() doesn't + * populate it, the iterator is exhausted. If populated, this is the value that should be returned by + * next(). */ private AlignmentContext nextLocus = null; @@ -62,7 +63,7 @@ public abstract class LocusView extends LocusIterator implements View { this.genomeLocParser = provider.getGenomeLocParser(); this.loci = provider.getLocusIterator(); - seedNextLocus(); + advance(); provider.register(this); } @@ -113,6 +114,7 @@ public abstract class LocusView extends LocusIterator implements View { * @return True if another locus context is bounded by this shard. */ protected boolean hasNextLocus() { + advance(); return nextLocus != null; } @@ -122,16 +124,17 @@ public abstract class LocusView extends LocusIterator implements View { * @throw NoSuchElementException if the next element is missing. */ protected AlignmentContext nextLocus() { + advance(); if(nextLocus == null) throw new NoSuchElementException("No more elements remain in locus context queue."); // Cache the current and apply filtering. AlignmentContext current = nextLocus; - - // Find the next. - seedNextLocus(); if( sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null ) current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage ); + + // Indicate that the next operation will need to advance. + nextLocus = null; return current; } @@ -139,7 +142,11 @@ public abstract class LocusView extends LocusIterator implements View { /** * Seed the nextLocus variable with the contents of the next locus (if one exists). */ - private void seedNextLocus() { + private void advance() { + // Already an unclaimed locus present + if(nextLocus != null) + return; + //System.out.printf("loci is %s%n", loci); if( !loci.hasNext() ) { nextLocus = null; @@ -155,7 +162,7 @@ public abstract class LocusView extends LocusIterator implements View { while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() ) nextLocus = loci.next(); - // If nothing in the shard was found, indicate that by setting nextAlignmentContext to null. + // If nothing in the shard was found, indicate that by setting nextLocus to null. if( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) ) nextLocus = null; } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 48bb9bea9..5c341bb02 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -34,7 +34,7 @@ public class WindowMaker implements Iterable, I /** * The data source for reads. Will probably come directly from the BAM file. */ - private final PeekableIterator sourceIterator; + private final Iterator sourceIterator; /** * Stores the sequence of intervals that the windowmaker should be tracking. @@ -46,6 +46,14 @@ public class WindowMaker implements Iterable, I */ private boolean shardGenerated = false; + /** + * The alignment context to return from this shard's iterator. Lazy implementation: the iterator will not find the + * currentAlignmentContext until absolutely required to do so. If currentAlignmentContext is null and advance() + * doesn't populate it, no more elements are available. If currentAlignmentContext is non-null, currentAlignmentContext + * should be returned by next(). + */ + private AlignmentContext currentAlignmentContext; + /** * Create a new window maker with the given iterator as a data source, covering * the given intervals. @@ -58,10 +66,7 @@ public class WindowMaker implements Iterable, I this.sourceInfo = shard.getReadProperties(); this.readIterator = iterator; - LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData); - - - this.sourceIterator = new PeekableIterator(locusIterator); + this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData); this.intervalIterator = intervals.size()>0 ? new PeekableIterator(intervals.iterator()) : null; } @@ -92,9 +97,14 @@ public class WindowMaker implements Iterable, I */ private final GenomeLoc locus; + /** + * Signal not to advance the iterator because we're currently sitting at the next element. + */ + private boolean atNextElement = false; + public WindowMakerIterator(GenomeLoc locus) { this.locus = locus; - seedNextLocus(); + advance(); } public ReadProperties getSourceInfo() { @@ -110,19 +120,60 @@ public class WindowMaker implements Iterable, I } public boolean hasNext() { - // locus == null when doing monolithic sharding. - return sourceIterator.hasNext() && sourceIterator.peek().getLocation().overlapsP(locus); + advance(); + return atNextElement; } public AlignmentContext next() { - if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval."); - return sourceIterator.next(); + advance(); + if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval."); + + // Prepare object state for no next element. + AlignmentContext toReturn = currentAlignmentContext; + currentAlignmentContext = null; + atNextElement = false; + + // Return the current element. + return toReturn; } - public void seedNextLocus() { - // locus == null when doing monolithic sharding. - while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus)) - sourceIterator.next(); + private void advance() { + // No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one. + if(locus == null) { + if(!atNextElement && sourceIterator.hasNext()) { + currentAlignmentContext = sourceIterator.next(); + atNextElement = true; + } + return; + } + + // Can't possibly find another element. Skip out early. + if(currentAlignmentContext == null && !sourceIterator.hasNext()) + return; + + // Need to find the next element that is not past shard boundaries. If we travel past the edge of + // shard boundaries, stop and let the next interval pick it up. + while(sourceIterator.hasNext()) { + // Seed the current alignment context first time through the loop. + if(currentAlignmentContext == null) + currentAlignmentContext = sourceIterator.next(); + + // Found a match. + if(locus.containsP(currentAlignmentContext.getLocation())) { + atNextElement = true; + break; + } + // Whoops. Skipped passed the end of the region. Iteration for this window is complete. + if(locus.isBefore(currentAlignmentContext.getLocation())) + break; + + // No more elements to examine. Iteration is complete. + if(!sourceIterator.hasNext()) + break; + + // Advance the iterator and try again. + currentAlignmentContext = sourceIterator.next(); + } } } }