Mediocre implementation of reader pooling within the SAM data source. Will fix this week.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2915 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
74a5223b11
commit
104f4f7383
|
|
@ -38,6 +38,8 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
* Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation.
|
||||
*/
|
||||
public class SAMFileReader2 extends SAMFileReader {
|
||||
private final File sourceFile;
|
||||
|
||||
/**
|
||||
* Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file
|
||||
*/
|
||||
|
|
@ -66,6 +68,7 @@ public class SAMFileReader2 extends SAMFileReader {
|
|||
*/
|
||||
public SAMFileReader2(final File file, final File indexFile, final boolean eagerDecode){
|
||||
super(file,indexFile,eagerDecode);
|
||||
this.sourceFile = file;
|
||||
close();
|
||||
|
||||
try {
|
||||
|
|
@ -181,6 +184,17 @@ public class SAMFileReader2 extends SAMFileReader {
|
|||
catch(NoSuchMethodException ex) {
|
||||
throw new StingException("Unable to run method findIndexFile",ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null) return false;
|
||||
if(!(other instanceof SAMFileReader2)) return false;
|
||||
return this.sourceFile.equals(((SAMFileReader2)other).sourceFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return sourceFile.hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,8 +133,19 @@ public class LocusReferenceView extends ReferenceView {
|
|||
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the bounds of this shard, trimming the bounds so that they match the reference.
|
||||
* @param provider Provider covering the appropriate locus.
|
||||
*/
|
||||
private void initializeBounds(ShardDataProvider provider) {
|
||||
bounds = provider.getLocus();
|
||||
if(provider.getLocus() != null) {
|
||||
long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength();
|
||||
bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(),
|
||||
Math.max(provider.getLocus().getStart(),1),
|
||||
Math.min(provider.getLocus().getStop(),sequenceLength));
|
||||
}
|
||||
else
|
||||
bounds = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -24,7 +24,15 @@ import java.io.File;
|
|||
*/
|
||||
public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||
|
||||
private final SamFileHeaderMerger headerMerger;
|
||||
/**
|
||||
* A collection of readers driving the merging process.
|
||||
*/
|
||||
private final SAMResourcePool resourcePool;
|
||||
|
||||
/**
|
||||
* The merged header.
|
||||
*/
|
||||
private final SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* Create a new block-aware SAM data source given the supplied read metadata.
|
||||
|
|
@ -35,22 +43,33 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
|
||||
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
|
||||
|
||||
Collection<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||
for(File readsFile: reads.getReadsFiles()) {
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile);
|
||||
reader.setValidationStringency(reads.getValidationStringency());
|
||||
readers.add(reader);
|
||||
}
|
||||
|
||||
this.headerMerger = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true);
|
||||
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
header = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true).getMergedHeader();
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
|
||||
public boolean hasIndex() {
|
||||
for(SAMFileReader reader: headerMerger.getReaders()) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
return hasIndex(readers);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Report whether a given collection of SAM file readers is indexed.
|
||||
* @param readers The collection of readers.
|
||||
* @return True if the given collection of readers is indexed.
|
||||
*/
|
||||
private boolean hasIndex(Collection<SAMFileReader> readers) {
|
||||
for(SAMFileReader reader: readers) {
|
||||
if(!reader.hasIndex())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -59,12 +78,19 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A map of reader back to bin.
|
||||
*/
|
||||
public List<Bin> getOverlappingBins(final GenomeLoc location) {
|
||||
if(headerMerger.getReaders().size() == 0)
|
||||
return Collections.emptyList();
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
SAMFileReader2 reader = (SAMFileReader2)headerMerger.getReaders().iterator().next();
|
||||
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
return Collections.emptyList();
|
||||
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
|
||||
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -73,12 +99,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A map of the file pointers bounding the bin.
|
||||
*/
|
||||
public Map<SAMFileReader2,List<Chunk>> getFilePointersBounding(Bin bin) {
|
||||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(SAMFileReader reader: headerMerger.getReaders()) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
}
|
||||
return filePointers;
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
return filePointers;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -86,12 +118,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A mapping of reader to current position.
|
||||
*/
|
||||
public Map<SAMFileReader2,Chunk> getCurrentPosition() {
|
||||
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
|
||||
for(SAMFileReader reader: headerMerger.getReaders()) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
currentPositions.put(reader2,reader2.getCurrentPosition());
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
currentPositions.put(reader2,reader2.getCurrentPosition());
|
||||
}
|
||||
return currentPositions;
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
return currentPositions;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -99,12 +137,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return Number of levels in this index.
|
||||
*/
|
||||
public int getNumIndexLevels() {
|
||||
if(headerMerger.getReaders().size() == 0)
|
||||
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)headerMerger.getReaders().iterator().next();
|
||||
return firstReader.getNumIndexLevels();
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getNumIndexLevels();
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -113,12 +157,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
if(headerMerger.getReaders().size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)headerMerger.getReaders().iterator().next();
|
||||
return firstReader.getLevelForBin(bin);
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLevelForBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -127,12 +177,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
if(headerMerger.getReaders().size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)headerMerger.getReaders().iterator().next();
|
||||
return firstReader.getFirstLocusInBin(bin);
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getFirstLocusInBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -141,12 +197,18 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
if(headerMerger.getReaders().size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)headerMerger.getReaders().iterator().next();
|
||||
return firstReader.getLastLocusInBin(bin);
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLastLocusInBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -162,7 +224,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
// know why this is. Please add a comment here if you do.
|
||||
boolean enableVerification = shard instanceof ReadShard;
|
||||
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(shard,false,enableVerification);
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(shard,enableVerification);
|
||||
while(!shard.isBufferFull() && iterator.hasNext())
|
||||
shard.addRead(iterator.next());
|
||||
|
||||
|
|
@ -185,24 +247,29 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
// Since the beginning of time for the GATK, enableVerification has been true only for ReadShards. I don't
|
||||
// know why this is. Please add a comment here if you do.
|
||||
boolean enableVerification = shard instanceof ReadShard;
|
||||
return getIterator(bamAwareShard,true,enableVerification);
|
||||
return getIterator(bamAwareShard,enableVerification);
|
||||
}
|
||||
}
|
||||
|
||||
private StingSAMIterator getIterator(BAMFormatAwareShard shard, boolean addIntervalFilter, boolean enableVerification) {
|
||||
private StingSAMIterator getIterator(BAMFormatAwareShard shard, boolean enableVerification) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
|
||||
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
|
||||
for(Map.Entry<SAMFileReader2,List<Chunk>> chunksByReader: shard.getChunks().entrySet()) {
|
||||
SAMFileReader2 reader = chunksByReader.getKey();
|
||||
readerToIteratorMap.put(reader,reader.iterator(shard.getChunks().get(reader)));
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
List<Chunk> chunks = shard.getChunks().get(reader2);
|
||||
readerToIteratorMap.put(reader2,reader2.iterator(chunks));
|
||||
}
|
||||
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true);
|
||||
|
||||
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||
if(shard.getFilter() != null)
|
||||
iterator = new FilteringIterator(iterator,shard.getFilter());
|
||||
|
||||
return applyDecoratingIterators(enableVerification,
|
||||
StingSAMIteratorAdapter.adapt(reads,iterator),
|
||||
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(reads,iterator)),
|
||||
reads.getDownsamplingFraction(),
|
||||
reads.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
reads.getSupplementalFilters());
|
||||
|
|
@ -213,7 +280,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The merged header.
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return headerMerger.getMergedHeader();
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -239,4 +306,110 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
|
||||
throw new UnsupportedOperationException("Getting read group ID from this experimental SAM reader is not currently supported.");
|
||||
}
|
||||
|
||||
private class SAMResourcePool {
|
||||
/**
|
||||
* How many entries can be cached in this resource pool?
|
||||
*/
|
||||
private final int maxEntries;
|
||||
|
||||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
private List<SAMFileReaders> allResources = new ArrayList<SAMFileReaders>();
|
||||
|
||||
/**
|
||||
* All iterators that are not currently in service.
|
||||
*/
|
||||
private List<SAMFileReaders> availableResources = new ArrayList<SAMFileReaders>();
|
||||
|
||||
public SAMResourcePool(final int maxEntries) {
|
||||
this.maxEntries = maxEntries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a set of readers from the pool to use for this query. When complete,
|
||||
* @return
|
||||
*/
|
||||
public synchronized Collection<SAMFileReader> getAvailableReaders() {
|
||||
if(availableResources.size() == 0)
|
||||
createNewResource();
|
||||
SAMFileReaders readers = availableResources.get(0);
|
||||
availableResources.remove(readers);
|
||||
return readers;
|
||||
}
|
||||
|
||||
public synchronized void releaseReaders(Collection<SAMFileReader> readers) {
|
||||
if(!allResources.contains(readers))
|
||||
throw new StingException("Tried to return readers from the pool that didn't originate in the pool.");
|
||||
availableResources.add((SAMFileReaders)readers);
|
||||
}
|
||||
|
||||
private synchronized void createNewResource() {
|
||||
if(allResources.size() > maxEntries)
|
||||
throw new StingException("Cannot create a new resource pool. All resources are in use.");
|
||||
SAMFileReaders readers = new SAMFileReaders(reads);
|
||||
allResources.add(readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
||||
/**
|
||||
* A collection of readers derived from a reads metadata structure.
|
||||
*/
|
||||
private class SAMFileReaders extends ArrayList<SAMFileReader> {
|
||||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param sourceInfo Metadata for the reads to load.
|
||||
*/
|
||||
public SAMFileReaders(Reads sourceInfo) {
|
||||
for(File readsFile: sourceInfo.getReadsFiles()) {
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile);
|
||||
reader.setValidationStringency(sourceInfo.getValidationStringency());
|
||||
add(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class ReleasingIterator implements StingSAMIterator {
|
||||
/**
|
||||
* The resource acting as the source of the data.
|
||||
*/
|
||||
private final Collection<SAMFileReader> resource;
|
||||
|
||||
/**
|
||||
* The iterator to wrap.
|
||||
*/
|
||||
private final StingSAMIterator wrappedIterator;
|
||||
|
||||
public Reads getSourceInfo() {
|
||||
return wrappedIterator.getSourceInfo();
|
||||
}
|
||||
|
||||
public ReleasingIterator( Collection<SAMFileReader> resource, StingSAMIterator wrapped ) {
|
||||
this.resource = resource;
|
||||
this.wrappedIterator = wrapped;
|
||||
}
|
||||
|
||||
public ReleasingIterator iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||
}
|
||||
|
||||
public void close() {
|
||||
wrappedIterator.close();
|
||||
resourcePool.releaseReaders(resource);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import java.io.File;
|
|||
* @author hanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@Deprecated
|
||||
class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMResourcePool.class);
|
||||
|
|
|
|||
Loading…
Reference in New Issue