Locus iterators were implemented in a peekable style, which meant that a locus

and its three or four nearest neighbors could be in memory at once.  Tweaking
the iterators to ensure that previous AlignmentContexts don't have strong 
references which means that the garbage collector can work effectively to
help us trundle through these regions.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5820 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-05-18 21:40:40 +00:00
parent a38b2be329
commit 0bb6b9a91a
3 changed files with 141 additions and 47 deletions

View File

@ -42,6 +42,11 @@ public class AllLocusView extends LocusView {
*/
private AlignmentContext nextLocus = null;
/**
* Signal not to advance the iterator because we're currently sitting at the next element.
*/
private boolean atNextElement = false;
/**
* Create a new queue of locus contexts.
* @param provider
@ -50,49 +55,80 @@ public class AllLocusView extends LocusView {
super( provider );
// Seed the state tracking members with the first possible seek position and the first possible locus context.
locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus());
if( locusIterator.hasNext() ) {
// cache next position and next alignment context
nextPosition = locusIterator.next();
nextLocus = hasNextLocus() ? nextLocus() : createEmptyLocus(nextPosition);
}
}
public boolean hasNext() {
advance();
return nextPosition != null;
}
public AlignmentContext next() {
advance();
GenomeLoc currentPosition = nextPosition;
if( currentPosition == null )
if(nextPosition == null)
throw new NoSuchElementException("No next is available in the all locus view");
// Crank the iterator to (if possible) or past the next context.
while( nextLocus != null && nextLocus.getLocation().isBefore(currentPosition) && hasNextLocus() )
nextLocus = nextLocus();
// Flag to the iterator that no data is waiting in the queue to be processed.
atNextElement = false;
AlignmentContext currentLocus = null; // context we are gonna return
AlignmentContext currentLocus;
// If actual data is present, return it. Otherwise, return empty data.
if( nextLocus != null && nextLocus.getLocation().equals(currentPosition) ) {
currentLocus = nextLocus; // found alignment context at the current position
nextLocus = hasNextLocus() ? nextLocus() : null;
}
if( nextLocus != null && nextLocus.getLocation().equals(nextPosition) )
currentLocus = nextLocus;
else
currentLocus = createEmptyLocus( currentPosition );
// Determine the next locus. The trick is that we may have more than one alignment context at the same
// reference position (regular base pileup, then extended pileup). If next alignment context (that we just pre-read)
// is still at the current position, we do not increment current position and wait for next call to next() to return
// that context. If we know that next context is past the current position, we are done with current
// position
if ( nextLocus == null || ! nextLocus.getLocation().equals(currentPosition) )
nextPosition = locusIterator.hasNext() ? locusIterator.next() : null;
currentLocus = createEmptyLocus(nextPosition);
return currentLocus;
}
private void advance() {
// Already at the next element? Don't move forward.
if(atNextElement)
return;
// Out of elements?
if(nextPosition == null && !locusIterator.hasNext())
return;
// If nextLocus has been consumed, clear it out to make room for the next incoming locus.
if(nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) {
nextLocus = null;
// Determine the next locus. The trick is that we may have more than one alignment context at the same
// reference position (regular base pileup, then extended pileup). If next alignment context (that we just pre-read)
// is still at the current position, we do not increment current position and wait for next call to next() to return
// that context. If we know that next context is past the current position, we are done with current
// position
if(hasNextLocus()) {
nextLocus = nextLocus();
if(nextPosition.equals(nextLocus.getLocation())) {
atNextElement = true;
return;
}
}
}
// No elements left in queue? Clear out the position state tracker and return.
if(!locusIterator.hasNext()) {
nextPosition = null;
return;
}
// Actually fill the next position.
nextPosition = locusIterator.next();
atNextElement = true;
// Crank the iterator to (if possible) or past the next context. Be careful not to hold a reference to nextLocus
// while using the hasNextLocus() / nextLocus() machinery; this will cause us to use more memory than is optimal.
while(nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) {
nextLocus = null;
if(!hasNextLocus())
break;
nextLocus = nextLocus();
}
}
/**
* Creates a blank locus context at the specified location.
* @param site Site at which to create the blank locus context.

View File

@ -50,8 +50,9 @@ public abstract class LocusView extends LocusIterator implements View {
private LocusIterator loci;
/**
* The next locus context from the iterator. This value must always be within
* the shard; if its null, there's nothing for the consumer to look at.
* The next locus context from the iterator. Lazy loaded: if nextLocus is null and advance() doesn't
* populate it, the iterator is exhausted. If populated, this is the value that should be returned by
* next().
*/
private AlignmentContext nextLocus = null;
@ -62,7 +63,7 @@ public abstract class LocusView extends LocusIterator implements View {
this.genomeLocParser = provider.getGenomeLocParser();
this.loci = provider.getLocusIterator();
seedNextLocus();
advance();
provider.register(this);
}
@ -113,6 +114,7 @@ public abstract class LocusView extends LocusIterator implements View {
* @return True if another locus context is bounded by this shard.
*/
protected boolean hasNextLocus() {
advance();
return nextLocus != null;
}
@ -122,16 +124,17 @@ public abstract class LocusView extends LocusIterator implements View {
* @throw NoSuchElementException if the next element is missing.
*/
protected AlignmentContext nextLocus() {
advance();
if(nextLocus == null)
throw new NoSuchElementException("No more elements remain in locus context queue.");
// Cache the current and apply filtering.
AlignmentContext current = nextLocus;
// Find the next.
seedNextLocus();
if( sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null )
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
// Indicate that the next operation will need to advance.
nextLocus = null;
return current;
}
@ -139,7 +142,11 @@ public abstract class LocusView extends LocusIterator implements View {
/**
* Seed the nextLocus variable with the contents of the next locus (if one exists).
*/
private void seedNextLocus() {
private void advance() {
// Already an unclaimed locus present
if(nextLocus != null)
return;
//System.out.printf("loci is %s%n", loci);
if( !loci.hasNext() ) {
nextLocus = null;
@ -155,7 +162,7 @@ public abstract class LocusView extends LocusIterator implements View {
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
nextLocus = loci.next();
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
// If nothing in the shard was found, indicate that by setting nextLocus to null.
if( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) )
nextLocus = null;
}

View File

@ -34,7 +34,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
private final PeekableIterator<AlignmentContext> sourceIterator;
private final Iterator<AlignmentContext> sourceIterator;
/**
* Stores the sequence of intervals that the windowmaker should be tracking.
@ -46,6 +46,14 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private boolean shardGenerated = false;
/**
* The alignment context to return from this shard's iterator. Lazy implementation: the iterator will not find the
* currentAlignmentContext until absolutely required to do so. If currentAlignmentContext is null and advance()
* doesn't populate it, no more elements are available. If currentAlignmentContext is non-null, currentAlignmentContext
* should be returned by next().
*/
private AlignmentContext currentAlignmentContext;
/**
* Create a new window maker with the given iterator as a data source, covering
* the given intervals.
@ -58,10 +66,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData);
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData);
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
@ -92,9 +97,14 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private final GenomeLoc locus;
/**
* Signal not to advance the iterator because we're currently sitting at the next element.
*/
private boolean atNextElement = false;
public WindowMakerIterator(GenomeLoc locus) {
this.locus = locus;
seedNextLocus();
advance();
}
public ReadProperties getSourceInfo() {
@ -110,19 +120,60 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
}
public boolean hasNext() {
// locus == null when doing monolithic sharding.
return sourceIterator.hasNext() && sourceIterator.peek().getLocation().overlapsP(locus);
advance();
return atNextElement;
}
public AlignmentContext next() {
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
return sourceIterator.next();
advance();
if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
// Prepare object state for no next element.
AlignmentContext toReturn = currentAlignmentContext;
currentAlignmentContext = null;
atNextElement = false;
// Return the current element.
return toReturn;
}
public void seedNextLocus() {
// locus == null when doing monolithic sharding.
while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus))
sourceIterator.next();
private void advance() {
// No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one.
if(locus == null) {
if(!atNextElement && sourceIterator.hasNext()) {
currentAlignmentContext = sourceIterator.next();
atNextElement = true;
}
return;
}
// Can't possibly find another element. Skip out early.
if(currentAlignmentContext == null && !sourceIterator.hasNext())
return;
// Need to find the next element that is not past shard boundaries. If we travel past the edge of
// shard boundaries, stop and let the next interval pick it up.
while(sourceIterator.hasNext()) {
// Seed the current alignment context first time through the loop.
if(currentAlignmentContext == null)
currentAlignmentContext = sourceIterator.next();
// Found a match.
if(locus.containsP(currentAlignmentContext.getLocation())) {
atNextElement = true;
break;
}
// Whoops. Skipped passed the end of the region. Iteration for this window is complete.
if(locus.isBefore(currentAlignmentContext.getLocation()))
break;
// No more elements to examine. Iteration is complete.
if(!sourceIterator.hasNext())
break;
// Advance the iterator and try again.
currentAlignmentContext = sourceIterator.next();
}
}
}
}