Fix for some correctness bugs found during early performance testing, phase 1.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2822 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
60f05379a7
commit
dc885ba386
|
|
@ -36,7 +36,7 @@ import net.sf.samtools.util.CloseableIterator;
|
||||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||||
* the requested output format is unsorted, in which case any combination is valid.
|
* the requested output format is unsorted, in which case any combination is valid.
|
||||||
*/
|
*/
|
||||||
public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
|
||||||
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||||
private final SamFileHeaderMerger samHeaderMerger;
|
private final SamFileHeaderMerger samHeaderMerger;
|
||||||
private final SAMFileHeader.SortOrder sortOrder;
|
private final SAMFileHeader.SortOrder sortOrder;
|
||||||
|
|
@ -44,7 +44,7 @@ public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
||||||
/**
|
/**
|
||||||
* Maps iterators back to the readers from which they are derived.
|
* Maps iterators back to the readers from which they are derived.
|
||||||
*/
|
*/
|
||||||
private final Map<Iterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<Iterator<SAMRecord>,SAMFileReader>();
|
private final Map<CloseableIterator<SAMRecord>,SAMFileReader> iteratorToSourceMap = new HashMap<CloseableIterator<SAMRecord>,SAMFileReader>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||||
|
|
@ -94,6 +94,15 @@ public class MergingSamRecordIterator implements Iterator<SAMRecord> {
|
||||||
return readerToIteratorMap;
|
return readerToIteratorMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close down all open iterators.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
|
||||||
|
for(CloseableIterator<SAMRecord> iterator: pq)
|
||||||
|
iterator.close();
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return !this.pq.isEmpty();
|
return !this.pq.isEmpty();
|
||||||
|
|
|
||||||
|
|
@ -31,10 +31,7 @@ import net.sf.samtools.util.StringLineReader;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -185,7 +182,7 @@ class BAMFileReader2
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
public List<Bin> getOverlappingBins(final String sequence, final int start, final int end) {
|
||||||
List<Bin> bins = null;
|
List<Bin> bins = Collections.emptyList();
|
||||||
|
|
||||||
final SAMFileHeader fileHeader = getFileHeader();
|
final SAMFileHeader fileHeader = getFileHeader();
|
||||||
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
int referenceIndex = fileHeader.getSequenceIndex(sequence);
|
||||||
|
|
@ -402,6 +399,8 @@ class BAMFileReader2
|
||||||
|
|
||||||
public SAMRecord next() {
|
public SAMRecord next() {
|
||||||
final SAMRecord result = mNextRecord;
|
final SAMRecord result = mNextRecord;
|
||||||
|
if(result.getAlignmentStart() <= 11632602 && result.getAlignmentEnd() >= 11632602)
|
||||||
|
System.out.printf("11632602: %s%n", result.getReadName());
|
||||||
advance();
|
advance();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,7 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
* @return True if another locus context is bounded by this shard.
|
* @return True if another locus context is bounded by this shard.
|
||||||
*/
|
*/
|
||||||
protected boolean hasNextLocus() {
|
protected boolean hasNextLocus() {
|
||||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
return nextLocus != null;
|
||||||
return nextLocus != null && (lastLocus == null || !nextLocus.getLocation().isPast(lastLocus));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -122,25 +121,17 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
* @throw NoSuchElementException if the next element is missing.
|
* @throw NoSuchElementException if the next element is missing.
|
||||||
*/
|
*/
|
||||||
protected AlignmentContext nextLocus() {
|
protected AlignmentContext nextLocus() {
|
||||||
GenomeLoc lastLocus = !shard.getGenomeLocs().isEmpty() ? shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1) : null;
|
if(nextLocus == null)
|
||||||
|
|
||||||
if( nextLocus == null || (lastLocus != null && nextLocus.getLocation().isPast(lastLocus)) )
|
|
||||||
throw new NoSuchElementException("No more elements remain in locus context queue.");
|
throw new NoSuchElementException("No more elements remain in locus context queue.");
|
||||||
|
|
||||||
// Cache the current and apply filtering.
|
// Cache the current and apply filtering.
|
||||||
AlignmentContext current = nextLocus;
|
AlignmentContext current = nextLocus;
|
||||||
|
|
||||||
// Find the next.
|
// Find the next.
|
||||||
if( loci.hasNext() ) {
|
seedNextLocus();
|
||||||
nextLocus = loci.next();
|
if( sourceInfo.getDownsampleToCoverage() != null )
|
||||||
if( sourceInfo.getDownsampleToCoverage() != null )
|
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
||||||
current.downsampleToCoverage( sourceInfo.getDownsampleToCoverage() );
|
|
||||||
if( lastLocus != null && nextLocus.getLocation().isPast(lastLocus) )
|
|
||||||
nextLocus = null;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nextLocus = null;
|
|
||||||
|
|
||||||
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
// if the current loci isn't null, get the overflow tracker and pass it to the alignment context
|
||||||
if ((this.loci != null))
|
if ((this.loci != null))
|
||||||
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
|
current.setLocusOverflowTracker(loci.getLocusOverflowTracker());
|
||||||
|
|
@ -152,21 +143,56 @@ public abstract class LocusView extends LocusIterator implements View {
|
||||||
*/
|
*/
|
||||||
private void seedNextLocus() {
|
private void seedNextLocus() {
|
||||||
//System.out.printf("loci is %s%n", loci);
|
//System.out.printf("loci is %s%n", loci);
|
||||||
if( loci.hasNext() )
|
if( !loci.hasNext() ) {
|
||||||
nextLocus = loci.next();
|
nextLocus = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
nextLocus = loci.next();
|
||||||
|
|
||||||
// If the location of this shard is available, trim the data stream to match the shard.
|
// If the location of this shard is available, trim the data stream to match the shard.
|
||||||
if(!shard.getGenomeLocs().isEmpty()) {
|
if(!shard.getGenomeLocs().isEmpty()) {
|
||||||
// Iterate past cruft at the beginning to the first locus in the shard.
|
// Iterate through any elements not contained within this shard.
|
||||||
while( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) && loci.hasNext() )
|
while( nextLocus != null && !isContainedInShard(nextLocus.getLocation()) && loci.hasNext() )
|
||||||
nextLocus = loci.next();
|
nextLocus = loci.next();
|
||||||
|
|
||||||
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
|
// If nothing in the shard was found, indicate that by setting nextAlignmentContext to null.
|
||||||
if( nextLocus != null && nextLocus.getLocation().isBefore(shard.getGenomeLocs().get(0)) )
|
if( nextLocus != null && (isBeforeShard(nextLocus.getLocation()) || isAfterShard(nextLocus.getLocation())) )
|
||||||
nextLocus = null;
|
nextLocus = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this location before the given shard.
|
||||||
|
* @param location Location to check.
|
||||||
|
* @return True if the given location is before the start of the shard. False otherwise.
|
||||||
|
*/
|
||||||
|
private boolean isBeforeShard(GenomeLoc location) {
|
||||||
|
return location.isBefore(shard.getGenomeLocs().get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this location after the given shard.
|
||||||
|
* @param location Location to check.
|
||||||
|
* @return True if the given location is after the end of the shard. False otherwise.
|
||||||
|
*/
|
||||||
|
private boolean isAfterShard(GenomeLoc location) {
|
||||||
|
return location.isPast(shard.getGenomeLocs().get(shard.getGenomeLocs().size()-1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this location contained in the given shard.
|
||||||
|
* @param location Location to check.
|
||||||
|
* @return True if the given location is contained within the shard. False otherwise.
|
||||||
|
*/
|
||||||
|
private boolean isContainedInShard(GenomeLoc location) {
|
||||||
|
for(GenomeLoc shardLocation: shard.getGenomeLocs()) {
|
||||||
|
if(shardLocation.containsP(location))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
* Class to filter out un-handle-able reads from the stream. We currently are skipping
|
||||||
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
* unmapped reads, non-primary reads, unaligned reads, and duplicate reads.
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import net.sf.samtools.*;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||||
|
import net.sf.picard.filter.FilteringIterator;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -127,9 +128,12 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||||
readerToIteratorMap.put(reader,reader.iterator(chunks));
|
readerToIteratorMap.put(reader,reader.iterator(chunks));
|
||||||
}
|
}
|
||||||
|
|
||||||
MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||||
|
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||||
|
FilteringIterator filteringIterator = new FilteringIterator(mergingIterator,new IntervalOverlappingFilter(shard.getGenomeLocs()));
|
||||||
|
|
||||||
return applyDecoratingIterators(enableVerification,
|
return applyDecoratingIterators(enableVerification,
|
||||||
StingSAMIteratorAdapter.adapt(reads,iterator),
|
StingSAMIteratorAdapter.adapt(reads,filteringIterator),
|
||||||
reads.getDownsamplingFraction(),
|
reads.getDownsamplingFraction(),
|
||||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||||
reads.getSupplementalFilters());
|
reads.getSupplementalFilters());
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters reads out of a data stream that don't overlap with the given list of locations.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class IntervalOverlappingFilter implements SamRecordFilter {
|
||||||
|
/**
|
||||||
|
* The list of locations containing reads to keep.
|
||||||
|
*/
|
||||||
|
private final List<GenomeLoc> intervals;
|
||||||
|
|
||||||
|
public IntervalOverlappingFilter(List<GenomeLoc> intervals) {
|
||||||
|
this.intervals = intervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter out this record if it doesn't appear in the interval list.
|
||||||
|
* @param read The read to examine.
|
||||||
|
* @return True to filter the read out. False otherwise.
|
||||||
|
*/
|
||||||
|
public boolean filterOut(SAMRecord read) {
|
||||||
|
GenomeLoc readLocation = GenomeLocParser.createGenomeLoc(read);
|
||||||
|
for(GenomeLoc interval: intervals) {
|
||||||
|
if(interval.overlapsP(readLocation))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue