Refactoring; make a better home for the MalformedReadFilteringIterator.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1194 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c78a72e775
commit
d8fbb2b62c
|
|
@ -1,121 +1,122 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.io.File;
|
|
||||||
/**
|
|
||||||
* User: hanna
|
|
||||||
* Date: Jun 23, 2009
|
|
||||||
* Time: 6:49:04 PM
|
|
||||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
||||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
||||||
* All rights are reserved.
|
|
||||||
*
|
|
||||||
* Users acknowledge that this software is supplied without any warranty or support.
|
|
||||||
* The Broad Institute is not responsible for its use, misuse, or
|
|
||||||
* functionality.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maintains a pointer into a stream of reads. Tracks state between mapped and unmapped.
|
* Abstract class that models a current state in some category of reads.
|
||||||
* For mapped, assumes that the user will query directly to where they want; closes the iterator after each use.
|
* @author hanna
|
||||||
* For unmapped, assumes that the user will walk through the entire stream. Keeps the iterator open permanently.
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
enum MappingType { MAPPED, UNMAPPED }
|
abstract class ReadStreamPointer {
|
||||||
|
|
||||||
class ReadStreamPointer {
|
|
||||||
/** our log, which we want to capture anything from this class */
|
|
||||||
protected static Logger logger = Logger.getLogger(ReadStreamPointer.class);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Describes the source of reads data.
|
* Describes the source of reads data.
|
||||||
*/
|
*/
|
||||||
private final Reads sourceInfo;
|
protected final Reads sourceInfo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open handles to the reads info.
|
* Open handles to the reads info.
|
||||||
*/
|
*/
|
||||||
private final SamFileHeaderMerger headerMerger;
|
protected final SamFileHeaderMerger headerMerger;
|
||||||
|
|
||||||
/**
|
public ReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||||
* The (possibly merged) header for the input fileset.
|
|
||||||
*/
|
|
||||||
private final SAMFileHeader header;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In which bucket of reads does this pointer live?
|
|
||||||
*/
|
|
||||||
private MappingType streamPosition = MappingType.MAPPED;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A pointer to the current position of this iterator in the read stream.
|
|
||||||
*/
|
|
||||||
private PositionTrackingIterator unmappedIterator = null;
|
|
||||||
|
|
||||||
public ReadStreamPointer( Reads sourceInfo ) {
|
|
||||||
this.sourceInfo = sourceInfo;
|
this.sourceInfo = sourceInfo;
|
||||||
this.headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
|
this.headerMerger = headerMerger;
|
||||||
this.header = this.headerMerger.getMergedHeader();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the header information for the read stream.
|
* Can this pointer access the provided segment efficiently?
|
||||||
* @return Header information for the read stream.
|
* @param segment Segment to test.
|
||||||
|
* @return True if it would be quick for this segment to access the given data.
|
||||||
|
* False if accessing this data would require some sort of reinitialization.
|
||||||
*/
|
*/
|
||||||
public SAMFileHeader getHeader() {
|
public abstract boolean canAccessSegmentEfficiently(DataStreamSegment segment);
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can this pointer be efficiently used to access the given segment?
|
* Close this resource, destroying all file handles.
|
||||||
* @param segment Segment to inspect.
|
|
||||||
* @return True if the segment can be accessed efficiently, false otherwise.
|
|
||||||
*/
|
*/
|
||||||
public boolean canAccessSegmentEfficiently( DataStreamSegment segment ) {
|
|
||||||
switch( streamPosition ) {
|
|
||||||
case MAPPED:
|
|
||||||
return true;
|
|
||||||
case UNMAPPED:
|
|
||||||
if( segment instanceof MappedStreamSegment )
|
|
||||||
return false;
|
|
||||||
else if( segment instanceof UnmappedStreamSegment ) {
|
|
||||||
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
|
|
||||||
return unmappedIterator.position <= unmappedSegment.position;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new StingException("Unsupported stream segment type: " + segment.getClass());
|
|
||||||
default:
|
|
||||||
throw new StingException("Pointer has hit illegal stream position; current position is " + streamPosition);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
if( unmappedIterator != null )
|
|
||||||
unmappedIterator.close();
|
|
||||||
for (SAMFileReader reader : headerMerger.getReaders())
|
for (SAMFileReader reader : headerMerger.getReaders())
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove an iterator from service.
|
||||||
|
* @param iterator The iterator to remove from service. Must not be null.
|
||||||
|
*/
|
||||||
|
public abstract void destroy( StingSAMIterator iterator );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a stream of all the reads that overlap a given segment.
|
* Get a stream of all the reads that overlap a given segment.
|
||||||
* @param segment Segment to check for overlaps.
|
* @param segment Segment to check for overlaps.
|
||||||
* @return An iterator over all reads overlapping the given segment.
|
* @return An iterator over all reads overlapping the given segment.
|
||||||
*/
|
*/
|
||||||
|
public abstract StingSAMIterator getReadsOverlapping( MappedStreamSegment segment );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a stream of all the reads that are completely contained by a given segment.
|
||||||
|
* The segment can be mapped or unmapped.
|
||||||
|
* @param segment Segment to check for containment..
|
||||||
|
* @return An iterator over all reads contained by the given segment.
|
||||||
|
*/
|
||||||
|
public abstract StingSAMIterator getReadsContainedBy( DataStreamSegment segment );
|
||||||
|
}
|
||||||
|
|
||||||
|
class MappedReadStreamPointer extends ReadStreamPointer {
|
||||||
|
|
||||||
|
public MappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||||
|
super( sourceInfo, headerMerger );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MappedReadStreamPointers can access any segment efficiently. Always return true.
|
||||||
|
* @param segment Segment to test.
|
||||||
|
* @return True.
|
||||||
|
*/
|
||||||
|
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public void destroy( StingSAMIterator iterator ) {
|
||||||
|
iterator.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
|
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
|
||||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||||
mergingIterator.queryOverlapping( segment.locus.getContig(),
|
mergingIterator.queryOverlapping( segment.locus.getContig(),
|
||||||
|
|
@ -124,8 +125,13 @@ class ReadStreamPointer {
|
||||||
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||||
if( segment instanceof MappedStreamSegment ) {
|
if( !(segment instanceof MappedStreamSegment) )
|
||||||
|
throw new StingException("Trying to access unmapped content from a mapped read stream pointer");
|
||||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||||
mergingIterator.queryContained( mappedSegment.locus.getContig(),
|
mergingIterator.queryContained( mappedSegment.locus.getContig(),
|
||||||
|
|
@ -133,148 +139,84 @@ class ReadStreamPointer {
|
||||||
(int)mappedSegment.locus.getStop());
|
(int)mappedSegment.locus.getStop());
|
||||||
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
|
||||||
}
|
}
|
||||||
else if( segment instanceof UnmappedStreamSegment ) {
|
|
||||||
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
|
|
||||||
|
|
||||||
// If the stream position has not flipped over to the unmapped state, do some initialization.
|
/**
|
||||||
if( streamPosition == MappingType.MAPPED ) {
|
* Convert a mapped read stream pointer to an unmapped read stream pointer, transferring ownership
|
||||||
|
* of the underlying file handles to the new container.
|
||||||
|
* After doing this conversion, the source MappedReadStreamPointer should not be used.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public UnmappedReadStreamPointer toUnmappedReadStreamPointer() {
|
||||||
|
return new UnmappedReadStreamPointer( sourceInfo, headerMerger );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class UnmappedReadStreamPointer extends ReadStreamPointer {
|
||||||
|
/**
|
||||||
|
* A pointer to the current position of this iterator in the read stream.
|
||||||
|
*/
|
||||||
|
private PositionTrackingIterator unmappedIterator = null;
|
||||||
|
|
||||||
|
public UnmappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
|
||||||
|
super( sourceInfo, headerMerger );
|
||||||
|
|
||||||
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
|
||||||
mergingIterator.queryUnmappedReads();
|
mergingIterator.queryUnmappedReads();
|
||||||
unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L );
|
unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L );
|
||||||
streamPosition = MappingType.UNMAPPED;
|
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
if( streamPosition != MappingType.UNMAPPED || unmappedIterator == null )
|
/**
|
||||||
throw new StingException("Illegal state: iterator has fetched all mapped reads but has not properly transition to unmapped reads");
|
* UnmappedReadStreamPointers are streams and can therefore access 'future' reads in the file quickly,
|
||||||
|
* but reads already seen are difficult to seek to.
|
||||||
|
* @param segment Segment to test.
|
||||||
|
* @return True if this DataStreamSegment follows the current position.
|
||||||
|
*/
|
||||||
|
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||||
|
if( !(segment instanceof UnmappedStreamSegment) )
|
||||||
|
return false;
|
||||||
|
return unmappedIterator.getPosition() <= ((UnmappedStreamSegment)segment).position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
|
||||||
|
throw new UnsupportedOperationException("Unable to determine overlapped reads of an unmapped segment");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||||
|
if( !(segment instanceof UnmappedStreamSegment) )
|
||||||
|
throw new StingException("Trying to access mapped content from an unmapped read stream pointer");
|
||||||
|
|
||||||
|
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
|
||||||
|
|
||||||
// Force the iterator to the next pending position.
|
// Force the iterator to the next pending position.
|
||||||
while(unmappedIterator.position < unmappedSegment.position)
|
while(unmappedIterator.getPosition() < unmappedSegment.position)
|
||||||
unmappedIterator.next();
|
unmappedIterator.next();
|
||||||
}
|
|
||||||
|
|
||||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size);
|
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
throw new StingException("Unable to handle stream segment of type" + segment.getClass());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A private function that, given the internal file list, generates a merging construct for
|
|
||||||
* all available files.
|
|
||||||
* @param reads source information about the reads.
|
|
||||||
* @param SORT_ORDER sort order for the reads.
|
|
||||||
* @return a list of SAMFileReaders that represent the stored file names
|
|
||||||
*/
|
|
||||||
protected SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
|
|
||||||
throws SimpleDataSourceLoadException {
|
|
||||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
|
||||||
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
|
||||||
for (File f : reads.getReadsFiles()) {
|
|
||||||
SAMFileReader reader = new SAMFileReader(f, true);
|
|
||||||
reader.setValidationStringency(reads.getValidationStringency());
|
|
||||||
|
|
||||||
final SAMFileHeader header = reader.getFileHeader();
|
|
||||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
|
||||||
|
|
||||||
if (reader.getFileHeader().getReadGroups().size() < 1) {
|
|
||||||
//logger.warn("Setting header in reader " + f.getName());
|
|
||||||
SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName());
|
|
||||||
rec.setLibrary(f.getName());
|
|
||||||
rec.setSample(f.getName());
|
|
||||||
|
|
||||||
reader.getFileHeader().addReadGroup(rec);
|
|
||||||
}
|
|
||||||
|
|
||||||
lst.add(reader);
|
|
||||||
}
|
|
||||||
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
|
|
||||||
}
|
|
||||||
|
|
||||||
private class PositionTrackingIterator implements StingSAMIterator {
|
|
||||||
/**
|
|
||||||
* Source information about the reads.
|
|
||||||
*/
|
|
||||||
private Reads sourceInfo;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The iterator being tracked.
|
|
||||||
*/
|
|
||||||
private CloseableIterator<SAMRecord> iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Current position within the tracked iterator.
|
|
||||||
*/
|
|
||||||
private long position;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public Reads getSourceInfo() {
|
|
||||||
return sourceInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the current position of the iterator. The 'current position' of the iterator is defined as
|
|
||||||
* the coordinate of the read that will be returned if next() is called.
|
|
||||||
* @return The current position of the iterator.
|
|
||||||
*/
|
|
||||||
public long getPosition() {
|
|
||||||
return position;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new iterator wrapping the given position, assuming that the reader is <code>position</code> reads
|
|
||||||
* into the sequence.
|
|
||||||
* @param sourceInfo Information about where these reads came from.
|
|
||||||
* @param iterator Iterator to wraps.
|
|
||||||
* @param position Non-negative position where the iterator currently sits.
|
|
||||||
*/
|
|
||||||
public PositionTrackingIterator( Reads sourceInfo, CloseableIterator<SAMRecord> iterator, long position ) {
|
|
||||||
this.sourceInfo = sourceInfo;
|
|
||||||
this.iterator = iterator;
|
|
||||||
this.position = position;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to get the next read in the list. If a next read is available, increment the position.
|
|
||||||
* @return next read in the list, if available.
|
|
||||||
*/
|
|
||||||
public SAMRecord next() {
|
|
||||||
try {
|
|
||||||
return iterator.next();
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
position++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
public StingSAMIterator iterator() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
// Position tracking iterators are constant through the life of the traversal. Don't close them.
|
if( unmappedIterator != null )
|
||||||
// TODO: This is an artifact of the fact that pooled query iterators need to be closed, but pooled unmapped
|
unmappedIterator.close();
|
||||||
// TODO: iterators must not be. Clean this up!
|
super.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a StingSAMIterator"); }
|
public void destroy( StingSAMIterator iterator ) {
|
||||||
|
// Don't destroy the iterator; reuse it.
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a single stream of read data. Used to represent the state of the stream and determine
|
||||||
|
* whether the state of this resource is such that it can field the desired query.
|
||||||
|
* @author hanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
class ReadStreamResource {
|
||||||
|
/** our log, which we want to capture anything from this class */
|
||||||
|
protected static Logger logger = Logger.getLogger(ReadStreamPointer.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The (possibly merged) header for the input fileset.
|
||||||
|
*/
|
||||||
|
private final SAMFileHeader header;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A pointer to the current location of the file.
|
||||||
|
*/
|
||||||
|
private ReadStreamPointer readStreamPointer = null;
|
||||||
|
|
||||||
|
public ReadStreamResource( Reads sourceInfo ) {
|
||||||
|
SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
|
||||||
|
|
||||||
|
this.header = headerMerger.getMergedHeader();
|
||||||
|
readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the header information for the read stream.
|
||||||
|
* @return Header information for the read stream.
|
||||||
|
*/
|
||||||
|
public SAMFileHeader getHeader() {
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
|
||||||
|
return readStreamPointer.canAccessSegmentEfficiently(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
readStreamPointer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void destroy( StingSAMIterator iterator ) {
|
||||||
|
readStreamPointer.destroy(iterator);
|
||||||
|
}
|
||||||
|
|
||||||
|
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
|
||||||
|
if( readStreamPointer instanceof MappedReadStreamPointer && segment instanceof UnmappedStreamSegment )
|
||||||
|
readStreamPointer = ((MappedReadStreamPointer)readStreamPointer).toUnmappedReadStreamPointer();
|
||||||
|
return readStreamPointer.getReadsContainedBy(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
|
||||||
|
return readStreamPointer.getReadsOverlapping(segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A private function that, given the internal file list, generates a merging construct for
|
||||||
|
* all available files.
|
||||||
|
* @param reads source information about the reads.
|
||||||
|
* @param SORT_ORDER sort order for the reads.
|
||||||
|
* @return a list of SAMFileReaders that represent the stored file names
|
||||||
|
* @throws SimpleDataSourceLoadException if the file cannot be opened.
|
||||||
|
*/
|
||||||
|
private SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
|
||||||
|
throws SimpleDataSourceLoadException {
|
||||||
|
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||||
|
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
||||||
|
for (File f : reads.getReadsFiles()) {
|
||||||
|
SAMFileReader reader = new SAMFileReader(f, true);
|
||||||
|
reader.setValidationStringency(reads.getValidationStringency());
|
||||||
|
|
||||||
|
final SAMFileHeader header = reader.getFileHeader();
|
||||||
|
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||||
|
|
||||||
|
if (reader.getFileHeader().getReadGroups().size() < 1) {
|
||||||
|
//logger.warn("Setting header in reader " + f.getName());
|
||||||
|
SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName());
|
||||||
|
rec.setLibrary(f.getName());
|
||||||
|
rec.setSample(f.getName());
|
||||||
|
|
||||||
|
reader.getFileHeader().addReadGroup(rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
lst.add(reader);
|
||||||
|
}
|
||||||
|
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,15 +10,12 @@ import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
|
|
||||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -74,15 +71,20 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
private boolean intoUnmappedReads = false;
|
private boolean intoUnmappedReads = false;
|
||||||
private int readsSeenAtLastPos = 0;
|
private int readsSeenAtLastPos = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A histogram of exactly what reads were removed from the input stream and why.
|
||||||
|
*/
|
||||||
|
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
||||||
|
|
||||||
// A pool of SAM iterators.
|
// A pool of SAM iterators.
|
||||||
private SAMIteratorPool iteratorPool = null;
|
private SAMResourcePool resourcePool = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
||||||
* @return Histogram of reads. Will not be null.
|
* @return Histogram of reads. Will not be null.
|
||||||
*/
|
*/
|
||||||
public SAMReadViolationHistogram getViolationHistogram() {
|
public SAMReadViolationHistogram getViolationHistogram() {
|
||||||
return iteratorPool.getViolationHistogram();
|
return violations;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -102,7 +104,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
iteratorPool = new SAMIteratorPool(reads);
|
resourcePool = new SAMResourcePool(reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -111,7 +113,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return SAM file header.
|
* @return SAM file header.
|
||||||
*/
|
*/
|
||||||
public SAMFileHeader getHeader() {
|
public SAMFileHeader getHeader() {
|
||||||
return iteratorPool.getHeader();
|
return resourcePool.getHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -123,7 +125,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
||||||
// setup the iterator pool if it's not setup
|
// setup the iterator pool if it's not setup
|
||||||
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
|
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
|
||||||
iteratorPool.setQueryOverlapping(queryOverlapping);
|
resourcePool.setQueryOverlapping(queryOverlapping);
|
||||||
|
|
||||||
StingSAMIterator iterator = null;
|
StingSAMIterator iterator = null;
|
||||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||||
|
|
@ -158,7 +160,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
private StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
private StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
||||||
return iteratorPool.iterator(new MappedStreamSegment(location));
|
return createIterator( new MappedStreamSegment(location) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -177,11 +179,11 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
if (!intoUnmappedReads) {
|
if (!intoUnmappedReads) {
|
||||||
if (lastReadPos == null) {
|
if (lastReadPos == null) {
|
||||||
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||||
iter = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
|
iter = createIterator(new MappedStreamSegment(lastReadPos));
|
||||||
return InitialReadIterator(shard.getSize(), iter);
|
return InitialReadIterator(shard.getSize(), iter);
|
||||||
} else {
|
} else {
|
||||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||||
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
|
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (intoUnmappedReads && !includeUnmappedReads)
|
if (intoUnmappedReads && !includeUnmappedReads)
|
||||||
|
|
@ -214,10 +216,10 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/**
|
/**
|
||||||
* For unit testing, add a custom iterator pool.
|
* For unit testing, add a custom iterator pool.
|
||||||
*
|
*
|
||||||
* @param iteratorPool Custom mock iterator pool.
|
* @param resourcePool Custom mock iterator pool.
|
||||||
*/
|
*/
|
||||||
void setResourcePool( SAMIteratorPool iteratorPool ) {
|
void setResourcePool( SAMResourcePool resourcePool ) {
|
||||||
this.iteratorPool = iteratorPool;
|
this.resourcePool = resourcePool;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -228,7 +230,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return the bounded iterator that you can use to get the intervaled reads from
|
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||||
*/
|
*/
|
||||||
StingSAMIterator toUnmappedReads( long readCount ) {
|
StingSAMIterator toUnmappedReads( long readCount ) {
|
||||||
StingSAMIterator iter = iteratorPool.iterator(new UnmappedStreamSegment(readsTaken, readCount));
|
StingSAMIterator iter = createIterator(new UnmappedStreamSegment(readsTaken, readCount));
|
||||||
readsTaken += readCount;
|
readsTaken += readCount;
|
||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
@ -277,7 +279,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
readsTaken = readCount;
|
readsTaken = readCount;
|
||||||
readsSeenAtLastPos = 0;
|
readsSeenAtLastPos = 0;
|
||||||
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
|
||||||
CloseableIterator<SAMRecord> ret = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
|
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
|
||||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
|
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -344,6 +346,16 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
return bound;
|
return bound;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an iterator over the selected segment, from a resource pulled from the pool.
|
||||||
|
* @param segment Segment over which to gather reads.
|
||||||
|
* @return An iterator over just the reads in the given segment.
|
||||||
|
*/
|
||||||
|
private StingSAMIterator createIterator( DataStreamSegment segment ) {
|
||||||
|
StingSAMIterator iterator = resourcePool.iterator(segment);
|
||||||
|
return new MalformedSAMFilteringIterator( getHeader(), iterator, violations );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter reads based on user-specified criteria.
|
* Filter reads based on user-specified criteria.
|
||||||
*
|
*
|
||||||
|
|
@ -376,127 +388,10 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
private static class ZeroMappingQualityReadFilterFunc implements SamRecordFilter {
|
private static class ZeroMappingQualityReadFilterFunc implements SamRecordFilter {
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
if (rec.getMappingQuality() == 0) {
|
return (rec.getMappingQuality() == 0);
|
||||||
//System.out.printf("Filtering 0 mapping quality read %s%n", rec.format());
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator> {
|
|
||||||
/** Source information about the reads. */
|
|
||||||
protected Reads reads;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A histogram of exactly what reads were removed from the input stream and why.
|
|
||||||
*/
|
|
||||||
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
|
||||||
|
|
||||||
/** Is this a by-reads traversal or a by-locus? */
|
|
||||||
protected boolean queryOverlapping;
|
|
||||||
|
|
||||||
/** File header for the combined file. */
|
|
||||||
protected SAMFileHeader header;
|
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
|
||||||
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
|
|
||||||
|
|
||||||
public SAMIteratorPool( Reads reads ) {
|
|
||||||
this.reads = reads;
|
|
||||||
this.queryOverlapping = true;
|
|
||||||
|
|
||||||
ReadStreamPointer streamPointer = createNewResource();
|
|
||||||
this.header = streamPointer.getHeader();
|
|
||||||
// Add this resource to the pool.
|
|
||||||
this.addNewResource(streamPointer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get the combined header for all files in the iterator pool. */
|
|
||||||
public SAMFileHeader getHeader() {
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
|
||||||
* @return Histogram of reads. Will not be null.
|
|
||||||
*/
|
|
||||||
public SAMReadViolationHistogram getViolationHistogram() {
|
|
||||||
return violations;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
|
||||||
for (ReadStreamPointer pointer : pointers) {
|
|
||||||
if (pointer.canAccessSegmentEfficiently(segment)) {
|
|
||||||
return pointer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ReadStreamPointer createNewResource() {
|
|
||||||
return new ReadStreamPointer(reads);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamPointer streamPointer ) {
|
|
||||||
StingSAMIterator iterator = null;
|
|
||||||
|
|
||||||
if (!queryOverlapping)
|
|
||||||
iterator = streamPointer.getReadsContainedBy(segment);
|
|
||||||
else {
|
|
||||||
if (!( segment instanceof MappedStreamSegment ))
|
|
||||||
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
|
|
||||||
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ReleasingIterator(new MalformedSAMFilteringIterator(header, iterator, violations));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void closeResource( ReadStreamPointer resource ) {
|
|
||||||
resource.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private class ReleasingIterator implements StingSAMIterator {
|
|
||||||
private final StingSAMIterator wrappedIterator;
|
|
||||||
|
|
||||||
public Reads getSourceInfo() {
|
|
||||||
return wrappedIterator.getSourceInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReleasingIterator( StingSAMIterator wrapped ) {
|
|
||||||
this.wrappedIterator = wrapped;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReleasingIterator iterator() {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
wrappedIterator.close();
|
|
||||||
release(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return wrappedIterator.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord next() {
|
|
||||||
return wrappedIterator.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isQueryOverlapping() {
|
|
||||||
return queryOverlapping;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setQueryOverlapping( boolean queryOverlapping ) {
|
|
||||||
this.queryOverlapping = queryOverlapping;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,153 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maintain a pool of resources of accessors to SAM read data. SAMFileReaders and
|
||||||
|
* headers are actually quite expensive to open, so this class manages the mechanics
|
||||||
|
* of keeping them open and reusing them.
|
||||||
|
* @author hanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator> {
|
||||||
|
/** Source information about the reads. */
|
||||||
|
protected Reads reads;
|
||||||
|
|
||||||
|
/** Is this a by-reads traversal or a by-locus? */
|
||||||
|
protected boolean queryOverlapping;
|
||||||
|
|
||||||
|
/** File header for the combined file. */
|
||||||
|
protected SAMFileHeader header;
|
||||||
|
|
||||||
|
/** our log, which we want to capture anything from this class */
|
||||||
|
protected static Logger logger = Logger.getLogger(SAMResourcePool.class);
|
||||||
|
|
||||||
|
public SAMResourcePool( Reads reads ) {
|
||||||
|
this.reads = reads;
|
||||||
|
this.queryOverlapping = true;
|
||||||
|
|
||||||
|
ReadStreamResource streamResource = createNewResource();
|
||||||
|
this.header = streamResource.getHeader();
|
||||||
|
// Add this resource to the pool.
|
||||||
|
this.addNewResource(streamResource);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the combined header for all files in the iterator pool. */
|
||||||
|
public SAMFileHeader getHeader() {
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ReadStreamResource selectBestExistingResource( DataStreamSegment segment, List<ReadStreamResource> resources ) {
|
||||||
|
for (ReadStreamResource resource : resources) {
|
||||||
|
if (resource.canAccessSegmentEfficiently(segment)) {
|
||||||
|
return resource;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ReadStreamResource createNewResource() {
|
||||||
|
return new ReadStreamResource(reads);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamResource streamResource ) {
|
||||||
|
StingSAMIterator iterator = null;
|
||||||
|
|
||||||
|
if (!queryOverlapping)
|
||||||
|
iterator = streamResource.getReadsContainedBy(segment);
|
||||||
|
else {
|
||||||
|
if (!( segment instanceof MappedStreamSegment ))
|
||||||
|
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
|
||||||
|
iterator = streamResource.getReadsOverlapping((MappedStreamSegment) segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ReleasingIterator( streamResource, iterator );
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void closeResource( ReadStreamResource resource ) {
|
||||||
|
resource.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ReleasingIterator implements StingSAMIterator {
|
||||||
|
/**
|
||||||
|
* The resource acting as the source of the data.
|
||||||
|
*/
|
||||||
|
private final ReadStreamResource resource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The iterator to wrap.
|
||||||
|
*/
|
||||||
|
private final StingSAMIterator wrappedIterator;
|
||||||
|
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
return wrappedIterator.getSourceInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReleasingIterator( ReadStreamResource resource, StingSAMIterator wrapped ) {
|
||||||
|
this.resource = resource;
|
||||||
|
this.wrappedIterator = wrapped;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReleasingIterator iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
resource.destroy(wrappedIterator);
|
||||||
|
release(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return wrappedIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord next() {
|
||||||
|
return wrappedIterator.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isQueryOverlapping() {
|
||||||
|
return queryOverlapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQueryOverlapping( boolean queryOverlapping ) {
|
||||||
|
this.queryOverlapping = queryOverlapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,120 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterates through a list of elements, tracking the number of elements it has seen.
|
||||||
|
* @author hanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class PositionTrackingIterator implements StingSAMIterator {
|
||||||
|
/**
|
||||||
|
* Source information about the reads.
|
||||||
|
*/
|
||||||
|
private Reads sourceInfo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The iterator being tracked.
|
||||||
|
*/
|
||||||
|
private CloseableIterator<SAMRecord> iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current position within the tracked iterator.
|
||||||
|
*/
|
||||||
|
private long position;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public Reads getSourceInfo() {
|
||||||
|
return sourceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the current position of the iterator. The 'current position' of the iterator is defined as
|
||||||
|
* the coordinate of the read that will be returned if next() is called.
|
||||||
|
* @return The current position of the iterator.
|
||||||
|
*/
|
||||||
|
public long getPosition() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new iterator wrapping the given position, assuming that the reader is <code>position</code> reads
|
||||||
|
* into the sequence.
|
||||||
|
* @param sourceInfo Information about where these reads came from.
|
||||||
|
* @param iterator Iterator to wraps.
|
||||||
|
* @param position Non-negative position where the iterator currently sits.
|
||||||
|
*/
|
||||||
|
public PositionTrackingIterator( Reads sourceInfo, CloseableIterator<SAMRecord> iterator, long position ) {
|
||||||
|
this.sourceInfo = sourceInfo;
|
||||||
|
this.iterator = iterator;
|
||||||
|
this.position = position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public boolean hasNext() {
|
||||||
|
return iterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to get the next read in the list. If a next read is available, increment the position.
|
||||||
|
* @return next read in the list, if available.
|
||||||
|
*/
|
||||||
|
public SAMRecord next() {
|
||||||
|
try {
|
||||||
|
return iterator.next();
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public StingSAMIterator iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
iterator.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from a StingSAMIterator"); }
|
||||||
|
}
|
||||||
|
|
@ -41,7 +41,7 @@ import java.io.File;
|
||||||
/**
|
/**
|
||||||
* use this to inject into SAMDataSource for testing
|
* use this to inject into SAMDataSource for testing
|
||||||
*/
|
*/
|
||||||
public class ArtificialResourcePool extends SAMIteratorPool {
|
public class ArtificialResourcePool extends SAMResourcePool {
|
||||||
// How strict should we be with SAM/BAM parsing?
|
// How strict should we be with SAM/BAM parsing?
|
||||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue