Fix for some correctness bugs found during early performance testing, phase 1.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2822 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
60f05379a7
commit
dc885ba386
|
|
@ -36,7 +36,7 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
||||
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
|
||||
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||
private final SamFileHeaderMerger samHeaderMerger;
|
||||
private final SAMFileHeader.SortOrder sortOrder;
|
||||
|
|
@ -44,7 +44,7 @@ public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
|||
/**
|
||||
* Maps iterators back to the readers from which they are derived.
|
||||
*/
|
||||
private final Map<Iterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<Iterator<SAMRecord>,SAMFileReader>();
|
||||
private final Map<CloseableIterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<CloseableIterator<SAMRecord>,SAMFileReader>();
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
|
|
@ -94,6 +94,15 @@ public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
|||
return readerToIteratorMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close down all open iterators.
|
||||
*/
|
||||
public void close() {
|
||||
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
|
||||
for(CloseableIterator<SAMRecord> iterator: pq)
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||
public boolean hasNext() {
|
||||
return !this.pq.isEmpty();
|
||||
|
|
|
|||
|
|
@ -31,10 +31,7 @@ import net.sf.samtools.util.StringLineReader;
|
|||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.*;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
|
|
@ -185,7 +182,7 @@ class BAMFileReader2
|
|||
}
|
||||
|
||||
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
||||
List<Bin> bins = null;
|
||||
List<Bin> bins = Collections.emptyList();
|
||||
|
||||
final SAMFileHeader fileHeader = getFileHeader();
|
||||
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||
|
|
@ -402,6 +399,8 @@ class BAMFileReader2
|
|||
|
||||
public SAMRecord next() {
|
||||
final SAMRecord result = mNextRecord;
|
||||
if(result.getAlignmentStart() <= 11632602 && result.getAlignmentEnd() >= 11632602)
|
||||
System.out.printf("11632602: %s%n", result.getReadName());
|
||||
advance();
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -112,8 +112,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
* @return True if another locus context is bounded by this shard.
|
||||
*/
|
||||
protected boolean hasNextLocus() {
|
||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
||||
return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus));
|
||||
return nextLocus != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -122,25 +121,17 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
* @throw NoSuchElementException if the next element is missing.
|
||||
*/
|
||||
protected AlignmentContext nextLocus() {
|
||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
||||
|
||||
if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) )
|
||||
if(nextLocus == null)
|
||||
throw new NoSuchElementException("No more elements remain in locus context queue.");
|
||||
|
||||
// Cache the current and apply filtering.
|
||||
AlignmentContext current = nextLocus;
|
||||
|
||||
// Find the next.
|
||||
if( loci.hasNext() ) {
|
||||
nextLocus = loci.next();
|
||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||
if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) )
|
||||
nextLocus = null;
|
||||
}
|
||||
else
|
||||
nextLocus = null;
|
||||
|
||||
seedNextLocus();
|
||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||
|
||||
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||
if ((this.loci != null))
|
||||
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
|
||||
|
|
@ -152,21 +143,56 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
*/
|
||||
private void seedNextLocus() {
|
||||
//System.out.printf("loci is %s%n", loci);
|
||||
if( loci.hasNext() )
|
||||
nextLocus = loci.next();
|
||||
if( !loci.hasNext() ) {
|
||||
nextLocus = null;
|
||||
return;
|
||||
}
|
||||
|
||||
nextLocus = loci.next();
|
||||
|
||||
// If the location of this shard is available, trim the data stream to match the shard.
|
||||
if(!shard.getGenomeLocs().isEmpty()) {
|
||||
// Iterate past cruft at the beginning to the first locus in the shard.
|
||||
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() )
|
||||
// Iterate through any elements not contained within this shard.
|
||||
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
|
||||
nextLocus = loci.next();
|
||||
|
||||
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
|
||||
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) )
|
||||
if( nextLocus != null && (isBeforeShard(nextLocus.getLocation()) || isAfterShard(nextLocus.getLocation())) )
|
||||
nextLocus = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this location before the given shard.
|
||||
* @param location Location to check.
|
||||
* @return True if the given location is before the start of the shard. False otherwise.
|
||||
*/
|
||||
private boolean isBeforeShard(GenomeLoc location) {
|
||||
return location.isBefore(shard.getGenomeLocs().get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this location after the given shard.
|
||||
* @param location Location to check.
|
||||
* @return True if the given location is after the end of the shard. False otherwise.
|
||||
*/
|
||||
private boolean isAfterShard(GenomeLoc location) {
|
||||
return location.isPast(shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this location contained in the given shard.
|
||||
* @param location Location to check.
|
||||
* @return True if the given location is contained within the shard. False otherwise.
|
||||
*/
|
||||
private boolean isContainedInShard(GenomeLoc location) {
|
||||
for(GenomeLoc shardLocation: shard.getGenomeLocs()) {
|
||||
if(shardLocation.containsP(location))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
||||
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import net.sf.samtools.*;
|
|||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
|
@ -127,9 +128,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
readerToIteratorMap.put(reader,reader.iterator(chunks));
|
||||
}
|
||||
|
||||
MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||
FilteringIterator filteringIterator = new FilteringIterator(mergingIterator,new IntervalOverlappingFilter(shard.getGenomeLocs()));
|
||||
|
||||
return applyDecoratingIterators(enableVerification,
|
||||
StingSAMIteratorAdapter.adapt(reads,iterator),
|
||||
StingSAMIteratorAdapter.adapt(reads,filteringIterator),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Filters reads out of a data stream that don't overlap with the given list of locations.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class IntervalOverlappingFilter implements SamRecordFilter {
|
||||
/**
|
||||
* The list of locations containing reads to keep.
|
||||
*/
|
||||
private final List<GenomeLoc> intervals;
|
||||
|
||||
public IntervalOverlappingFilter(List<GenomeLoc> intervals) {
|
||||
this.intervals = intervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter out this record if it doesn't appear in the interval list.
|
||||
* @param read The read to examine.
|
||||
* @return True to filter the read out. False otherwise.
|
||||
*/
|
||||
public boolean filterOut(SAMRecord read) {
|
||||
GenomeLoc readLocation = GenomeLocParser.createGenomeLoc(read);
|
||||
for(GenomeLoc interval: intervals) {
|
||||
if(interval.overlapsP(readLocation))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue