2010-02-25 08:16:50 +08:00
|
|
|
package org.broadinstitute.sting.gatk.executive;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2010-05-31 02:00:12 +08:00
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
2010-05-19 13:40:05 +08:00
|
|
|
import org.broadinstitute.sting.gatk.iterators.*;
|
2010-02-25 08:16:50 +08:00
|
|
|
import org.broadinstitute.sting.gatk.Reads;
|
2010-05-19 13:40:05 +08:00
|
|
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
2010-05-27 06:12:25 +08:00
|
|
|
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
2010-03-12 02:40:31 +08:00
|
|
|
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
2010-05-27 06:12:25 +08:00
|
|
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
2010-03-12 02:40:31 +08:00
|
|
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
2010-02-25 08:16:50 +08:00
|
|
|
|
2010-03-09 20:36:11 +08:00
|
|
|
import java.util.*;
|
2010-02-25 08:16:50 +08:00
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import net.sf.picard.util.PeekableIterator;
|
2010-03-12 02:40:31 +08:00
|
|
|
import net.sf.picard.filter.FilteringIterator;
|
|
|
|
|
import net.sf.picard.filter.SamRecordFilter;
|
2010-02-25 08:16:50 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Buffer shards of data which may or may not contain multiple loci into
|
|
|
|
|
* iterators of all data which cover an interval. Its existence is an homage
|
|
|
|
|
* to Mark's stillborn WindowMaker, RIP 2009.
|
|
|
|
|
*
|
|
|
|
|
* @author mhanna
|
|
|
|
|
* @version 0.1
|
|
|
|
|
*/
|
|
|
|
|
public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, Iterator<WindowMaker.WindowMakerIterator> {
|
2010-03-09 20:36:11 +08:00
|
|
|
/**
|
|
|
|
|
* Source information for iteration.
|
|
|
|
|
*/
|
|
|
|
|
private final Reads sourceInfo;
|
|
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
/**
|
|
|
|
|
* Hold the read iterator so that it can be closed later.
|
|
|
|
|
*/
|
|
|
|
|
private final StingSAMIterator readIterator;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The locus overflow tracker.
|
|
|
|
|
*/
|
|
|
|
|
private final LocusOverflowTracker locusOverflowTracker;
|
|
|
|
|
|
2010-02-25 08:16:50 +08:00
|
|
|
/**
|
|
|
|
|
* The data source for reads. Will probably come directly from the BAM file.
|
|
|
|
|
*/
|
2010-03-12 02:40:31 +08:00
|
|
|
private final PeekableIterator<AlignmentContext> sourceIterator;
|
2010-02-25 08:16:50 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Stores the sequence of intervals that the windowmaker should be tracking.
|
|
|
|
|
*/
|
|
|
|
|
private final PeekableIterator<GenomeLoc> intervalIterator;
|
|
|
|
|
|
|
|
|
|
/**
|
2010-03-12 02:40:31 +08:00
|
|
|
* In the case of monolithic sharding, this case returns whether the only shard has been generated.
|
2010-02-25 08:16:50 +08:00
|
|
|
*/
|
2010-03-12 02:40:31 +08:00
|
|
|
private boolean shardGenerated = false;
|
2010-02-25 08:16:50 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new window maker with the given iterator as a data source, covering
|
2010-05-19 13:40:05 +08:00
|
|
|
* the given intervals.
|
2010-02-25 08:16:50 +08:00
|
|
|
* @param iterator The data source for this window.
|
|
|
|
|
* @param intervals The set of intervals over which to traverse.
|
|
|
|
|
*/
|
2010-06-03 06:26:32 +08:00
|
|
|
public WindowMaker(StingSAMIterator iterator, List<GenomeLoc> intervals, List<SamRecordFilter> filters, List<LocusIteratorFilter> discards ) {
|
2010-03-09 20:36:11 +08:00
|
|
|
this.sourceInfo = iterator.getSourceInfo();
|
2010-03-12 02:40:31 +08:00
|
|
|
this.readIterator = iterator;
|
2010-05-19 13:40:05 +08:00
|
|
|
|
|
|
|
|
LocusIterator locusIterator;
|
2010-05-27 06:12:25 +08:00
|
|
|
Iterator<SAMRecord> wrappedIterator = TraversalEngine.addMandatoryFilteringIterators(iterator, filters);
|
2010-05-19 13:40:05 +08:00
|
|
|
if(sourceInfo.getDownsamplingMethod() != null &&
|
2010-06-18 21:54:27 +08:00
|
|
|
(sourceInfo.getDownsamplingMethod().type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)) {
|
2010-05-31 02:00:12 +08:00
|
|
|
if ( discards.size() > 0 )
|
|
|
|
|
throw new StingException("Experimental downsampling iterator doesn't support base discarding at this point; complain to Matt Hanna");
|
2010-05-27 06:12:25 +08:00
|
|
|
locusIterator = new DownsamplingLocusIteratorByState(wrappedIterator,sourceInfo);
|
2010-05-31 02:00:12 +08:00
|
|
|
} else
|
|
|
|
|
locusIterator = new LocusIteratorByState(wrappedIterator,sourceInfo, discards);
|
2010-05-19 13:40:05 +08:00
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
this.locusOverflowTracker = locusIterator.getLocusOverflowTracker();
|
|
|
|
|
|
|
|
|
|
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
|
|
|
|
|
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Iterator<WindowMakerIterator> iterator() {
|
|
|
|
|
return this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean hasNext() {
|
2010-03-12 02:40:31 +08:00
|
|
|
return (intervalIterator != null && intervalIterator.hasNext()) || !shardGenerated;
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public WindowMakerIterator next() {
|
2010-03-12 02:40:31 +08:00
|
|
|
shardGenerated = true;
|
|
|
|
|
return new WindowMakerIterator(intervalIterator != null ? intervalIterator.next() : null);
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void remove() {
|
|
|
|
|
throw new UnsupportedOperationException("Cannot remove from a window maker.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void close() {
|
2010-03-12 02:40:31 +08:00
|
|
|
this.readIterator.close();
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
public class WindowMakerIterator extends LocusIterator {
|
2010-02-25 08:16:50 +08:00
|
|
|
/**
|
|
|
|
|
* The locus for which this iterator is currently returning reads.
|
|
|
|
|
*/
|
|
|
|
|
private final GenomeLoc locus;
|
|
|
|
|
|
|
|
|
|
public WindowMakerIterator(GenomeLoc locus) {
|
|
|
|
|
this.locus = locus;
|
2010-03-12 02:40:31 +08:00
|
|
|
seedNextLocus();
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Reads getSourceInfo() {
|
2010-03-09 20:36:11 +08:00
|
|
|
return sourceInfo;
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public GenomeLoc getLocus() {
|
|
|
|
|
return locus;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public WindowMakerIterator iterator() {
|
|
|
|
|
return this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean hasNext() {
|
2010-03-12 02:40:31 +08:00
|
|
|
// locus == null when doing monolithic sharding.
|
|
|
|
|
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
|
|
|
|
|
return sourceIterator.hasNext() && (locus == null || sourceIterator.peek().getLocation().overlapsP(locus));
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
public AlignmentContext next() {
|
2010-03-09 20:36:11 +08:00
|
|
|
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
|
2010-03-12 02:40:31 +08:00
|
|
|
return sourceIterator.next();
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
public LocusOverflowTracker getLocusOverflowTracker() {
|
|
|
|
|
return locusOverflowTracker;
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 02:40:31 +08:00
|
|
|
public void seedNextLocus() {
|
|
|
|
|
// locus == null when doing monolithic sharding.
|
|
|
|
|
// TODO: Move the monolithic sharding iterator so that we don't have to special case here.
|
|
|
|
|
if(locus == null) return;
|
|
|
|
|
|
|
|
|
|
while(sourceIterator.hasNext() && sourceIterator.peek().getLocation().isBefore(locus))
|
|
|
|
|
sourceIterator.next();
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-02-25 08:16:50 +08:00
|
|
|
}
|