Refactoring; make a better home for the MalformedReadFilteringIterator.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1194 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-07-08 16:54:20 +00:00
parent c78a72e775
commit d8fbb2b62c
6 changed files with 606 additions and 365 deletions

View File

@ -1,121 +1,122 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.utils.StingException;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import java.util.List;
import java.util.ArrayList;
import java.io.File;
/**
* User: hanna
* Date: Jun 23, 2009
* Time: 6:49:04 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Maintains a pointer into a stream of reads. Tracks state between mapped and unmapped.
* For mapped, assumes that the user will query directly to where they want; closes the iterator after each use.
* For unmapped, assumes that the user will walk through the entire stream. Keeps the iterator open permanently.
* Abstract class that models a current state in some category of reads.
* @author hanna
* @version 0.1
*/
enum MappingType { MAPPED, UNMAPPED }
class ReadStreamPointer {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(ReadStreamPointer.class);
abstract class ReadStreamPointer {
/**
* Describes the source of reads data.
*/
private final Reads sourceInfo;
protected final Reads sourceInfo;
/**
* Open handles to the reads info.
*/
private final SamFileHeaderMerger headerMerger;
protected final SamFileHeaderMerger headerMerger;
/**
* The (possibly merged) header for the input fileset.
*/
private final SAMFileHeader header;
/**
* In which bucket of reads does this pointer live?
*/
private MappingType streamPosition = MappingType.MAPPED;
/**
* A pointer to the current position of this iterator in the read stream.
*/
private PositionTrackingIterator unmappedIterator = null;
public ReadStreamPointer( Reads sourceInfo ) {
public ReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
this.sourceInfo = sourceInfo;
this.headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
this.header = this.headerMerger.getMergedHeader();
this.headerMerger = headerMerger;
}
/**
* Gets the header information for the read stream.
* @return Header information for the read stream.
* Can this pointer access the provided segment efficiently?
* @param segment Segment to test.
* @return True if it would be quick for this segment to access the given data.
* False if accessing this data would require some sort of reinitialization.
*/
public SAMFileHeader getHeader() {
return header;
}
public abstract boolean canAccessSegmentEfficiently(DataStreamSegment segment);
/**
* Can this pointer be efficiently used to access the given segment?
* @param segment Segment to inspect.
* @return True if the segment can be accessed efficiently, false otherwise.
* Close this resource, destroying all file handles.
*/
public boolean canAccessSegmentEfficiently( DataStreamSegment segment ) {
switch( streamPosition ) {
case MAPPED:
return true;
case UNMAPPED:
if( segment instanceof MappedStreamSegment )
return false;
else if( segment instanceof UnmappedStreamSegment ) {
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
return unmappedIterator.position <= unmappedSegment.position;
}
else
throw new StingException("Unsupported stream segment type: " + segment.getClass());
default:
throw new StingException("Pointer has hit illegal stream position; current position is " + streamPosition);
}
}
public void close() {
if( unmappedIterator != null )
unmappedIterator.close();
for (SAMFileReader reader : headerMerger.getReaders())
reader.close();
}
/**
* Remove an iterator from service.
* @param iterator The iterator to remove from service. Must not be null.
*/
public abstract void destroy( StingSAMIterator iterator );
/**
* Get a stream of all the reads that overlap a given segment.
* @param segment Segment to check for overlaps.
* @return An iterator over all reads overlapping the given segment.
*/
public abstract StingSAMIterator getReadsOverlapping( MappedStreamSegment segment );
/**
* Get a stream of all the reads that are completely contained by a given segment.
* The segment can be mapped or unmapped.
* @param segment Segment to check for containment..
* @return An iterator over all reads contained by the given segment.
*/
public abstract StingSAMIterator getReadsContainedBy( DataStreamSegment segment );
}
class MappedReadStreamPointer extends ReadStreamPointer {
public MappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
super( sourceInfo, headerMerger );
}
/**
* MappedReadStreamPointers can access any segment efficiently. Always return true.
* @param segment Segment to test.
* @return True.
*/
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
return true;
}
/**
* {@inheritDoc}
*/
public void destroy( StingSAMIterator iterator ) {
iterator.close();
}
/**
* {@inheritDoc}
*/
@Override
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
mergingIterator.queryOverlapping( segment.locus.getContig(),
@ -124,157 +125,98 @@ class ReadStreamPointer {
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}
/**
* {@inheritDoc}
*/
@Override
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
if( segment instanceof MappedStreamSegment ) {
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
mergingIterator.queryContained( mappedSegment.locus.getContig(),
(int)mappedSegment.locus.getStart(),
(int)mappedSegment.locus.getStop());
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}
else if( segment instanceof UnmappedStreamSegment ) {
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
// If the stream position has not flipped over to the unmapped state, do some initialization.
if( streamPosition == MappingType.MAPPED ) {
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
mergingIterator.queryUnmappedReads();
unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L );
streamPosition = MappingType.UNMAPPED;
}
else {
if( streamPosition != MappingType.UNMAPPED || unmappedIterator == null )
throw new StingException("Illegal state: iterator has fetched all mapped reads but has not properly transition to unmapped reads");
// Force the iterator to the next pending position.
while(unmappedIterator.position < unmappedSegment.position)
unmappedIterator.next();
}
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size);
}
else
throw new StingException("Unable to handle stream segment of type" + segment.getClass());
if( !(segment instanceof MappedStreamSegment) )
throw new StingException("Trying to access unmapped content from a mapped read stream pointer");
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
mergingIterator.queryContained( mappedSegment.locus.getContig(),
(int)mappedSegment.locus.getStart(),
(int)mappedSegment.locus.getStop());
return StingSAMIteratorAdapter.adapt(sourceInfo,mergingIterator);
}
/**
* A private function that, given the internal file list, generates a merging construct for
* all available files.
* @param reads source information about the reads.
* @param SORT_ORDER sort order for the reads.
* @return a list of SAMFileReaders that represent the stored file names
* Convert a mapped read stream pointer to an unmapped read stream pointer, transferring ownership
* of the underlying file handles to the new container.
* After doing this conversion, the source MappedReadStreamPointer should not be used.
* @return
*/
protected SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
throws SimpleDataSourceLoadException {
// right now this is pretty damn heavy, it copies the file list into a reader list every time
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
for (File f : reads.getReadsFiles()) {
SAMFileReader reader = new SAMFileReader(f, true);
reader.setValidationStringency(reads.getValidationStringency());
final SAMFileHeader header = reader.getFileHeader();
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
if (reader.getFileHeader().getReadGroups().size() < 1) {
//logger.warn("Setting header in reader " + f.getName());
SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName());
rec.setLibrary(f.getName());
rec.setSample(f.getName());
reader.getFileHeader().addReadGroup(rec);
}
lst.add(reader);
}
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
}
private class PositionTrackingIterator implements StingSAMIterator {
/**
* Source information about the reads.
*/
private Reads sourceInfo;
/**
* The iterator being tracked.
*/
private CloseableIterator<SAMRecord> iterator;
/**
* Current position within the tracked iterator.
*/
private long position;
/**
* {@inheritDoc}
*/
public Reads getSourceInfo() {
return sourceInfo;
}
/**
* Retrieves the current position of the iterator. The 'current position' of the iterator is defined as
* the coordinate of the read that will be returned if next() is called.
* @return The current position of the iterator.
*/
public long getPosition() {
return position;
}
/**
* Create a new iterator wrapping the given position, assuming that the reader is <code>position</code> reads
* into the sequence.
* @param sourceInfo Information about where these reads came from.
* @param iterator Iterator to wraps.
* @param position Non-negative position where the iterator currently sits.
*/
public PositionTrackingIterator( Reads sourceInfo, CloseableIterator<SAMRecord> iterator, long position ) {
this.sourceInfo = sourceInfo;
this.iterator = iterator;
this.position = position;
}
/**
* {@inheritDoc}
*/
public boolean hasNext() {
return iterator.hasNext();
}
/**
* Try to get the next read in the list. If a next read is available, increment the position.
* @return next read in the list, if available.
*/
public SAMRecord next() {
try {
return iterator.next();
}
finally {
position++;
}
}
/**
* {@inheritDoc}
*/
public StingSAMIterator iterator() {
return this;
}
/**
* {@inheritDoc}
*/
public void close() {
// Position tracking iterators are constant through the life of the traversal. Don't close them.
// TODO: This is an artifact of the fact that pooled query iterators need to be closed, but pooled unmapped
// TODO: iterators must not be. Clean this up!
}
/**
* {@inheritDoc}
*/
public void remove() { throw new UnsupportedOperationException("Cannot remove from a StingSAMIterator"); }
public UnmappedReadStreamPointer toUnmappedReadStreamPointer() {
return new UnmappedReadStreamPointer( sourceInfo, headerMerger );
}
}
class UnmappedReadStreamPointer extends ReadStreamPointer {
/**
* A pointer to the current position of this iterator in the read stream.
*/
private PositionTrackingIterator unmappedIterator = null;
public UnmappedReadStreamPointer( Reads sourceInfo, SamFileHeaderMerger headerMerger ) {
super( sourceInfo, headerMerger );
MergingSamRecordIterator2 mergingIterator = new MergingSamRecordIterator2( headerMerger, sourceInfo );
mergingIterator.queryUnmappedReads();
unmappedIterator = new PositionTrackingIterator( sourceInfo, mergingIterator, 0L );
}
/**
* UnmappedReadStreamPointers are streams and can therefore access 'future' reads in the file quickly,
* but reads already seen are difficult to seek to.
* @param segment Segment to test.
* @return True if this DataStreamSegment follows the current position.
*/
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
if( !(segment instanceof UnmappedStreamSegment) )
return false;
return unmappedIterator.getPosition() <= ((UnmappedStreamSegment)segment).position;
}
/**
* {@inheritDoc}
*/
@Override
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
throw new UnsupportedOperationException("Unable to determine overlapped reads of an unmapped segment");
}
/**
* {@inheritDoc}
*/
@Override
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
if( !(segment instanceof UnmappedStreamSegment) )
throw new StingException("Trying to access mapped content from an unmapped read stream pointer");
UnmappedStreamSegment unmappedSegment = (UnmappedStreamSegment)segment;
// Force the iterator to the next pending position.
while(unmappedIterator.getPosition() < unmappedSegment.position)
unmappedIterator.next();
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(sourceInfo,unmappedIterator), unmappedSegment.size);
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
if( unmappedIterator != null )
unmappedIterator.close();
super.close();
}
/**
* {@inheritDoc}
*/
public void destroy( StingSAMIterator iterator ) {
// Don't destroy the iterator; reuse it.
}
}

View File

@ -0,0 +1,131 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.StingException;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.picard.sam.SamFileHeaderMerger;
import java.util.List;
import java.util.ArrayList;
import java.io.File;
/**
* Represents a single stream of read data. Used to represent the state of the stream and determine
* whether the state of this resource is such that it can field the desired query.
* @author hanna
* @version 0.1
*/
class ReadStreamResource {
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(ReadStreamPointer.class);
/**
* The (possibly merged) header for the input fileset.
*/
private final SAMFileHeader header;
/**
* A pointer to the current location of the file.
*/
private ReadStreamPointer readStreamPointer = null;
public ReadStreamResource( Reads sourceInfo ) {
SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
this.header = headerMerger.getMergedHeader();
readStreamPointer = new MappedReadStreamPointer(sourceInfo, headerMerger);
}
/**
* Gets the header information for the read stream.
* @return Header information for the read stream.
*/
public SAMFileHeader getHeader() {
return header;
}
public boolean canAccessSegmentEfficiently(DataStreamSegment segment) {
return readStreamPointer.canAccessSegmentEfficiently(segment);
}
public void close() {
readStreamPointer.close();
}
public void destroy( StingSAMIterator iterator ) {
readStreamPointer.destroy(iterator);
}
public StingSAMIterator getReadsContainedBy( DataStreamSegment segment ) {
if( readStreamPointer instanceof MappedReadStreamPointer && segment instanceof UnmappedStreamSegment )
readStreamPointer = ((MappedReadStreamPointer)readStreamPointer).toUnmappedReadStreamPointer();
return readStreamPointer.getReadsContainedBy(segment);
}
public StingSAMIterator getReadsOverlapping( MappedStreamSegment segment ) {
return readStreamPointer.getReadsOverlapping(segment);
}
/**
* A private function that, given the internal file list, generates a merging construct for
* all available files.
* @param reads source information about the reads.
* @param SORT_ORDER sort order for the reads.
* @return a list of SAMFileReaders that represent the stored file names
* @throws SimpleDataSourceLoadException if the file cannot be opened.
*/
private SamFileHeaderMerger createHeaderMerger( Reads reads, SAMFileHeader.SortOrder SORT_ORDER )
throws SimpleDataSourceLoadException {
// right now this is pretty damn heavy, it copies the file list into a reader list every time
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
for (File f : reads.getReadsFiles()) {
SAMFileReader reader = new SAMFileReader(f, true);
reader.setValidationStringency(reads.getValidationStringency());
final SAMFileHeader header = reader.getFileHeader();
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
if (reader.getFileHeader().getReadGroups().size() < 1) {
//logger.warn("Setting header in reader " + f.getName());
SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName());
rec.setLibrary(f.getName());
rec.setSample(f.getName());
reader.getFileHeader().addReadGroup(rec);
}
lst.add(reader);
}
return new SamFileHeaderMerger(lst,SORT_ORDER,true);
}
}

View File

@ -10,15 +10,12 @@ import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
@ -74,15 +71,20 @@ public class SAMDataSource implements SimpleDataSource {
private boolean intoUnmappedReads = false;
private int readsSeenAtLastPos = 0;
/**
* A histogram of exactly what reads were removed from the input stream and why.
*/
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
// A pool of SAM iterators.
private SAMIteratorPool iteratorPool = null;
private SAMResourcePool resourcePool = null;
/**
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
* @return Histogram of reads. Will not be null.
*/
public SAMReadViolationHistogram getViolationHistogram() {
return iteratorPool.getViolationHistogram();
return violations;
}
/**
@ -102,7 +104,7 @@ public class SAMDataSource implements SimpleDataSource {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
}
}
iteratorPool = new SAMIteratorPool(reads);
resourcePool = new SAMResourcePool(reads);
}
/**
@ -111,7 +113,7 @@ public class SAMDataSource implements SimpleDataSource {
* @return SAM file header.
*/
public SAMFileHeader getHeader() {
return iteratorPool.getHeader();
return resourcePool.getHeader();
}
/**
@ -123,7 +125,7 @@ public class SAMDataSource implements SimpleDataSource {
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
// setup the iterator pool if it's not setup
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
iteratorPool.setQueryOverlapping(queryOverlapping);
resourcePool.setQueryOverlapping(queryOverlapping);
StingSAMIterator iterator = null;
if (shard.getShardType() == Shard.ShardType.READ) {
@ -158,7 +160,7 @@ public class SAMDataSource implements SimpleDataSource {
* @return an iterator for that region
*/
private StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
return iteratorPool.iterator(new MappedStreamSegment(location));
return createIterator( new MappedStreamSegment(location) );
}
@ -177,11 +179,11 @@ public class SAMDataSource implements SimpleDataSource {
if (!intoUnmappedReads) {
if (lastReadPos == null) {
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
iter = createIterator(new MappedStreamSegment(lastReadPos));
return InitialReadIterator(shard.getSize(), iter);
} else {
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, createIterator(new MappedStreamSegment(lastReadPos))));
}
if (intoUnmappedReads && !includeUnmappedReads)
@ -214,10 +216,10 @@ public class SAMDataSource implements SimpleDataSource {
/**
* For unit testing, add a custom iterator pool.
*
* @param iteratorPool Custom mock iterator pool.
* @param resourcePool Custom mock iterator pool.
*/
void setResourcePool( SAMIteratorPool iteratorPool ) {
this.iteratorPool = iteratorPool;
void setResourcePool( SAMResourcePool resourcePool ) {
this.resourcePool = resourcePool;
}
/**
@ -228,7 +230,7 @@ public class SAMDataSource implements SimpleDataSource {
* @return the bounded iterator that you can use to get the intervaled reads from
*/
StingSAMIterator toUnmappedReads( long readCount ) {
StingSAMIterator iter = iteratorPool.iterator(new UnmappedStreamSegment(readsTaken, readCount));
StingSAMIterator iter = createIterator(new UnmappedStreamSegment(readsTaken, readCount));
readsTaken += readCount;
return iter;
}
@ -277,7 +279,7 @@ public class SAMDataSource implements SimpleDataSource {
readsTaken = readCount;
readsSeenAtLastPos = 0;
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
CloseableIterator<SAMRecord> ret = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
CloseableIterator<SAMRecord> ret = createIterator(new MappedStreamSegment(lastReadPos));
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
}
}
@ -344,6 +346,16 @@ public class SAMDataSource implements SimpleDataSource {
return bound;
}
/**
* Creates an iterator over the selected segment, from a resource pulled from the pool.
* @param segment Segment over which to gather reads.
* @return An iterator over just the reads in the given segment.
*/
private StingSAMIterator createIterator( DataStreamSegment segment ) {
StingSAMIterator iterator = resourcePool.iterator(segment);
return new MalformedSAMFilteringIterator( getHeader(), iterator, violations );
}
/**
* Filter reads based on user-specified criteria.
*
@ -376,127 +388,10 @@ public class SAMDataSource implements SimpleDataSource {
private static class ZeroMappingQualityReadFilterFunc implements SamRecordFilter {
public boolean filterOut(SAMRecord rec) {
if (rec.getMappingQuality() == 0) {
//System.out.printf("Filtering 0 mapping quality read %s%n", rec.format());
return true;
} else {
return false;
}
return (rec.getMappingQuality() == 0);
}
}
}
class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator> {
/** Source information about the reads. */
protected Reads reads;
/**
* A histogram of exactly what reads were removed from the input stream and why.
*/
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
/** Is this a by-reads traversal or a by-locus? */
protected boolean queryOverlapping;
/** File header for the combined file. */
protected SAMFileHeader header;
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
public SAMIteratorPool( Reads reads ) {
this.reads = reads;
this.queryOverlapping = true;
ReadStreamPointer streamPointer = createNewResource();
this.header = streamPointer.getHeader();
// Add this resource to the pool.
this.addNewResource(streamPointer);
}
/** Get the combined header for all files in the iterator pool. */
public SAMFileHeader getHeader() {
return header;
}
/**
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
* @return Histogram of reads. Will not be null.
*/
public SAMReadViolationHistogram getViolationHistogram() {
return violations;
}
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
for (ReadStreamPointer pointer : pointers) {
if (pointer.canAccessSegmentEfficiently(segment)) {
return pointer;
}
}
return null;
}
protected ReadStreamPointer createNewResource() {
return new ReadStreamPointer(reads);
}
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamPointer streamPointer ) {
StingSAMIterator iterator = null;
if (!queryOverlapping)
iterator = streamPointer.getReadsContainedBy(segment);
else {
if (!( segment instanceof MappedStreamSegment ))
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
}
return new ReleasingIterator(new MalformedSAMFilteringIterator(header, iterator, violations));
}
protected void closeResource( ReadStreamPointer resource ) {
resource.close();
}
private class ReleasingIterator implements StingSAMIterator {
private final StingSAMIterator wrappedIterator;
public Reads getSourceInfo() {
return wrappedIterator.getSourceInfo();
}
public ReleasingIterator( StingSAMIterator wrapped ) {
this.wrappedIterator = wrapped;
}
public ReleasingIterator iterator() {
return this;
}
public void remove() {
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
}
public void close() {
wrappedIterator.close();
release(this);
}
public boolean hasNext() {
return wrappedIterator.hasNext();
}
public SAMRecord next() {
return wrappedIterator.next();
}
}
public boolean isQueryOverlapping() {
return queryOverlapping;
}
public void setQueryOverlapping( boolean queryOverlapping ) {
this.queryOverlapping = queryOverlapping;
}
}

View File

@ -0,0 +1,153 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.StingException;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import java.util.List;
/**
* Maintain a pool of resources of accessors to SAM read data. SAMFileReaders and
* headers are actually quite expensive to open, so this class manages the mechanics
* of keeping them open and reusing them.
* @author hanna
* @version 0.1
*/
class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator> {
/** Source information about the reads. */
protected Reads reads;
/** Is this a by-reads traversal or a by-locus? */
protected boolean queryOverlapping;
/** File header for the combined file. */
protected SAMFileHeader header;
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMResourcePool.class);
public SAMResourcePool( Reads reads ) {
this.reads = reads;
this.queryOverlapping = true;
ReadStreamResource streamResource = createNewResource();
this.header = streamResource.getHeader();
// Add this resource to the pool.
this.addNewResource(streamResource);
}
/** Get the combined header for all files in the iterator pool. */
public SAMFileHeader getHeader() {
return header;
}
protected ReadStreamResource selectBestExistingResource( DataStreamSegment segment, List<ReadStreamResource> resources ) {
for (ReadStreamResource resource : resources) {
if (resource.canAccessSegmentEfficiently(segment)) {
return resource;
}
}
return null;
}
protected ReadStreamResource createNewResource() {
return new ReadStreamResource(reads);
}
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamResource streamResource ) {
StingSAMIterator iterator = null;
if (!queryOverlapping)
iterator = streamResource.getReadsContainedBy(segment);
else {
if (!( segment instanceof MappedStreamSegment ))
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
iterator = streamResource.getReadsOverlapping((MappedStreamSegment) segment);
}
return new ReleasingIterator( streamResource, iterator );
}
protected void closeResource( ReadStreamResource resource ) {
resource.close();
}
private class ReleasingIterator implements StingSAMIterator {
/**
* The resource acting as the source of the data.
*/
private final ReadStreamResource resource;
/**
* The iterator to wrap.
*/
private final StingSAMIterator wrappedIterator;
public Reads getSourceInfo() {
return wrappedIterator.getSourceInfo();
}
public ReleasingIterator( ReadStreamResource resource, StingSAMIterator wrapped ) {
this.resource = resource;
this.wrappedIterator = wrapped;
}
public ReleasingIterator iterator() {
return this;
}
public void remove() {
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
}
public void close() {
resource.destroy(wrappedIterator);
release(this);
}
public boolean hasNext() {
return wrappedIterator.hasNext();
}
public SAMRecord next() {
return wrappedIterator.next();
}
}
public boolean isQueryOverlapping() {
return queryOverlapping;
}
public void setQueryOverlapping( boolean queryOverlapping ) {
this.queryOverlapping = queryOverlapping;
}
}

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.Reads;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
/**
* Iterates through a list of elements, tracking the number of elements it has seen.
* @author hanna
* @version 0.1
*/
public class PositionTrackingIterator implements StingSAMIterator {
/**
* Source information about the reads.
*/
private Reads sourceInfo;
/**
* The iterator being tracked.
*/
private CloseableIterator<SAMRecord> iterator;
/**
* Current position within the tracked iterator.
*/
private long position;
/**
* {@inheritDoc}
*/
public Reads getSourceInfo() {
return sourceInfo;
}
/**
* Retrieves the current position of the iterator. The 'current position' of the iterator is defined as
* the coordinate of the read that will be returned if next() is called.
* @return The current position of the iterator.
*/
public long getPosition() {
return position;
}
/**
* Create a new iterator wrapping the given position, assuming that the reader is <code>position</code> reads
* into the sequence.
* @param sourceInfo Information about where these reads came from.
* @param iterator Iterator to wraps.
* @param position Non-negative position where the iterator currently sits.
*/
public PositionTrackingIterator( Reads sourceInfo, CloseableIterator<SAMRecord> iterator, long position ) {
this.sourceInfo = sourceInfo;
this.iterator = iterator;
this.position = position;
}
/**
* {@inheritDoc}
*/
public boolean hasNext() {
return iterator.hasNext();
}
/**
* Try to get the next read in the list. If a next read is available, increment the position.
* @return next read in the list, if available.
*/
public SAMRecord next() {
try {
return iterator.next();
}
finally {
position++;
}
}
/**
* {@inheritDoc}
*/
public StingSAMIterator iterator() {
return this;
}
/**
* {@inheritDoc}
*/
public void close() {
iterator.close();
}
/**
* {@inheritDoc}
*/
public void remove() { throw new UnsupportedOperationException("Cannot remove from a StingSAMIterator"); }
}

View File

@ -41,7 +41,7 @@ import java.io.File;
/**
* use this to inject into SAMDataSource for testing
*/
public class ArtificialResourcePool extends SAMIteratorPool {
public class ArtificialResourcePool extends SAMResourcePool {
// How strict should we be with SAM/BAM parsing?
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;