First pass at handling SAMFileReaders using a SAMReaderID. This allows us to firewall
GATK users from the readers, which they could abuse in ways that could destabilize the GATK. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2923 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b241e0915b
commit
023654696e
|
|
@ -185,16 +185,4 @@ public class SAMFileReader2 extends SAMFileReader {
|
|||
throw new StingException("Unable to run method findIndexFile",ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null) return false;
|
||||
if(!(other instanceof SAMFileReader2)) return false;
|
||||
return this.sourceFile.equals(((SAMFileReader2)other).sourceFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return sourceFile.hashCode();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,10 +30,7 @@ import net.sf.picard.filter.SamRecordFilter;
|
|||
import net.sf.samtools.*;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
|
|
@ -344,18 +341,16 @@ public class GenomeAnalysisEngine {
|
|||
* @return
|
||||
*/
|
||||
public List<Set<String>> getSamplesByReaders() {
|
||||
|
||||
|
||||
Collection<SAMFileReader> readers = getDataSource().getReaders();
|
||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMFileReader r : readers) {
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> samples = new HashSet<String>(1);
|
||||
sample_sets.add(samples);
|
||||
|
||||
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
|
||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
||||
samples.add(g.getSample());
|
||||
}
|
||||
}
|
||||
|
|
@ -375,16 +370,16 @@ public class GenomeAnalysisEngine {
|
|||
public List<Set<String>> getLibrariesByReaders() {
|
||||
|
||||
|
||||
Collection<SAMFileReader> readers = getDataSource().getReaders();
|
||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMFileReader r : readers) {
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> libs = new HashSet<String>(2);
|
||||
lib_sets.add(libs);
|
||||
|
||||
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
|
||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
||||
libs.add(g.getLibrary());
|
||||
}
|
||||
}
|
||||
|
|
@ -393,42 +388,30 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to the SAMFileReaders
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, SAMFileReader> getFileToReaderMapping() {
|
||||
return getDataSource().getFileToReaderMapping();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
|
||||
Map<File, SAMFileReader> fileToReaderMap = getFileToReaderMapping();
|
||||
|
||||
// populate the file -> read group mapping
|
||||
Map<File, Set<String>> fileToReadGroupIdMap = new HashMap<File, Set<String>>();
|
||||
for (Map.Entry<File, SAMFileReader> entry : fileToReaderMap.entrySet()) {
|
||||
|
||||
for (SAMReaderID id: getDataSource().getReaderIDs()) {
|
||||
Set<String> readGroups = new HashSet<String>(5);
|
||||
|
||||
for (SAMReadGroupRecord g : entry.getValue().getFileHeader().getReadGroups()) {
|
||||
for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) {
|
||||
if (getDataSource().hasReadGroupCollisions()) {
|
||||
// Check if there were read group clashes.
|
||||
// If there were, use the SamFileHeaderMerger to translate from the
|
||||
// original read group id to the read group id in the merged stream
|
||||
readGroups.add(getDataSource().getReadGroupId(entry.getValue(), g.getReadGroupId()));
|
||||
readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId()));
|
||||
} else {
|
||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||
readGroups.add(g.getReadGroupId());
|
||||
}
|
||||
}
|
||||
|
||||
fileToReadGroupIdMap.put(entry.getKey(), readGroups);
|
||||
fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups);
|
||||
}
|
||||
|
||||
return fileToReadGroupIdMap;
|
||||
|
|
@ -447,16 +430,16 @@ public class GenomeAnalysisEngine {
|
|||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||
|
||||
|
||||
Collection<SAMFileReader> readers = getDataSource().getReaders();
|
||||
List<SAMReaderID> readers = getDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMFileReader r : readers) {
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> groups = new HashSet<String>(5);
|
||||
rg_sets.add(groups);
|
||||
|
||||
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
|
||||
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
|
||||
if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
|
||||
// use HeaderMerger to translate original read group id from the reader into the read group id in the
|
||||
// merged stream, and save that remapped read group id to associate it with specific reader
|
||||
|
|
@ -789,6 +772,15 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the unmerged SAM file header for an individual reader.
|
||||
* @param reader The reader.
|
||||
* @return Header for that reader.
|
||||
*/
|
||||
public SAMFileHeader getSAMFileHeader(SAMReaderID reader) {
|
||||
return readsDataSource.getHeader(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns data source object encapsulating all essential info and handlers used to traverse
|
||||
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ import java.io.File;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class BlockDrivenSAMDataSource extends SAMDataSource {
|
||||
|
||||
/**
|
||||
* A collection of readers driving the merging process.
|
||||
*/
|
||||
|
|
@ -32,7 +31,17 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
/**
|
||||
* The merged header.
|
||||
*/
|
||||
private final SAMFileHeader header;
|
||||
private final SAMFileHeader mergedHeader;
|
||||
|
||||
/**
|
||||
* Whether the read groups in overlapping files collide.
|
||||
*/
|
||||
private final boolean hasReadGroupCollisions;
|
||||
|
||||
/**
|
||||
* Maps the SAM readers' original read group ids to their revised ids.
|
||||
*/
|
||||
private final Map<SAMReaderID,ReadGroupMapping> mergedReadGroupMappings = new HashMap<SAMReaderID,ReadGroupMapping>();
|
||||
|
||||
/**
|
||||
* Create a new block-aware SAM data source given the supplied read metadata.
|
||||
|
|
@ -44,32 +53,36 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
|
||||
|
||||
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
header = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true).getMergedHeader();
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
|
||||
mergedHeader = headerMerger.getMergedHeader();
|
||||
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
|
||||
|
||||
for(SAMReaderID id: readerIDs) {
|
||||
SAMFileReader reader = readers.getReader(id);
|
||||
ReadGroupMapping mapping = new ReadGroupMapping();
|
||||
|
||||
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
||||
for(SAMReadGroupRecord readGroup: readGroups)
|
||||
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
||||
|
||||
mergedReadGroupMappings.put(id,mapping);
|
||||
}
|
||||
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
|
||||
public boolean hasIndex() {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
return hasIndex(readers);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Report whether a given collection of SAM file readers is indexed.
|
||||
* @param readers The collection of readers.
|
||||
* @return True if the given collection of readers is indexed.
|
||||
* True if all readers have an index.
|
||||
* @return
|
||||
*/
|
||||
private boolean hasIndex(Collection<SAMFileReader> readers) {
|
||||
for(SAMFileReader reader: readers) {
|
||||
public boolean hasIndex() {
|
||||
for(SAMFileReader reader: resourcePool.getReadersWithoutLocking()) {
|
||||
if(!reader.hasIndex())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -78,19 +91,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A map of reader back to bin.
|
||||
*/
|
||||
public List<Bin> getOverlappingBins(final GenomeLoc location) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
return Collections.emptyList();
|
||||
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
return Collections.emptyList();
|
||||
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
|
||||
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
// All readers will have the same bin structure, so just use the first bin as an example.
|
||||
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
|
||||
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -99,18 +106,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A map of the file pointers bounding the bin.
|
||||
*/
|
||||
public Map<SAMFileReader2,List<Chunk>> getFilePointersBounding(Bin bin) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
}
|
||||
return filePointers;
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
|
||||
}
|
||||
return filePointers;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -118,18 +120,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return A mapping of reader to current position.
|
||||
*/
|
||||
public Map<SAMFileReader2,Chunk> getCurrentPosition() {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
currentPositions.put(reader2,reader2.getCurrentPosition());
|
||||
}
|
||||
return currentPositions;
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
SAMFileReader2 reader2 = (SAMFileReader2)reader;
|
||||
currentPositions.put(reader2,reader2.getCurrentPosition());
|
||||
}
|
||||
return currentPositions;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -137,18 +134,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return Number of levels in this index.
|
||||
*/
|
||||
public int getNumIndexLevels() {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getNumIndexLevels();
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getNumIndexLevels();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -157,18 +149,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return the level associated with the given bin number.
|
||||
*/
|
||||
public int getLevelForBin(final Bin bin) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLevelForBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLevelForBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -177,18 +164,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getFirstLocusInBin(final Bin bin) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getFirstLocusInBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getFirstLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -197,18 +179,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The last position that the given bin can represent.
|
||||
*/
|
||||
public int getLastLocusInBin(final Bin bin) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
try {
|
||||
if(readers.size() == 0)
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex(readers))
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLastLocusInBin(bin);
|
||||
}
|
||||
finally {
|
||||
resourcePool.releaseReaders(readers);
|
||||
}
|
||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||
if(readers.isEmpty())
|
||||
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
|
||||
if(!hasIndex())
|
||||
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
|
||||
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
|
||||
return firstReader.getLastLocusInBin(bin);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -252,7 +229,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
}
|
||||
|
||||
private StingSAMIterator getIterator(BAMFormatAwareShard shard, boolean enableVerification) {
|
||||
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
|
||||
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
|
||||
for(SAMFileReader reader: readers) {
|
||||
|
|
@ -261,7 +238,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
readerToIteratorMap.put(reader2,reader2.iterator(chunks));
|
||||
}
|
||||
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true);
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
|
||||
|
||||
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
|
||||
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
|
||||
|
|
@ -280,15 +257,11 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return The merged header.
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return header;
|
||||
return mergedHeader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently unsupported.
|
||||
* @return
|
||||
*/
|
||||
public Collection<SAMFileReader> getReaders() {
|
||||
throw new StingException("Currently unable to get readers for shard-based fields.");
|
||||
public SAMFileHeader getHeader(SAMReaderID id) {
|
||||
return resourcePool.getReadersWithoutLocking().getReader(id).getFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -296,15 +269,15 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
* @return False always.
|
||||
*/
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return false;
|
||||
return hasReadGroupCollisions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently unsupported.
|
||||
* @return
|
||||
* Gets the revised read group id mapped to this 'original' read group id.
|
||||
* @return Merged read group ID.
|
||||
*/
|
||||
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
|
||||
throw new UnsupportedOperationException("Getting read group ID from this experimental SAM reader is not currently supported.");
|
||||
public String getReadGroupId(final SAMReaderID reader, final String originalReadGroupId) {
|
||||
return mergedReadGroupMappings.get(reader).get(originalReadGroupId);
|
||||
}
|
||||
|
||||
private class SAMResourcePool {
|
||||
|
|
@ -316,66 +289,130 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
/**
|
||||
* All iterators of this reference-ordered data.
|
||||
*/
|
||||
private List<SAMFileReaders> allResources = new ArrayList<SAMFileReaders>();
|
||||
private List<SAMReaders> allResources = new ArrayList<SAMReaders>();
|
||||
|
||||
/**
|
||||
* All iterators that are not currently in service.
|
||||
*/
|
||||
private List<SAMFileReaders> availableResources = new ArrayList<SAMFileReaders>();
|
||||
private List<SAMReaders> availableResources = new ArrayList<SAMReaders>();
|
||||
|
||||
public SAMResourcePool(final int maxEntries) {
|
||||
this.maxEntries = maxEntries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dangerous internal method; retrieves any set of readers, whether in iteration or not.
|
||||
* Used to handle non-exclusive, stateless operations, such as index queries.
|
||||
* @return Any collection of SAMReaders, whether in iteration or not.
|
||||
*/
|
||||
protected SAMReaders getReadersWithoutLocking() {
|
||||
synchronized(this) {
|
||||
if(allResources.size() == 0)
|
||||
createNewResource();
|
||||
}
|
||||
return allResources.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a set of readers from the pool to use for this query. When complete,
|
||||
* @return
|
||||
*/
|
||||
public synchronized Collection<SAMFileReader> getAvailableReaders() {
|
||||
public synchronized SAMReaders getAvailableReaders() {
|
||||
if(availableResources.size() == 0)
|
||||
createNewResource();
|
||||
SAMFileReaders readers = availableResources.get(0);
|
||||
SAMReaders readers = availableResources.get(0);
|
||||
availableResources.remove(readers);
|
||||
return readers;
|
||||
}
|
||||
|
||||
public synchronized void releaseReaders(Collection<SAMFileReader> readers) {
|
||||
public synchronized void releaseReaders(SAMReaders readers) {
|
||||
if(!allResources.contains(readers))
|
||||
throw new StingException("Tried to return readers from the pool that didn't originate in the pool.");
|
||||
availableResources.add((SAMFileReaders)readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
||||
private synchronized void createNewResource() {
|
||||
if(allResources.size() > maxEntries)
|
||||
throw new StingException("Cannot create a new resource pool. All resources are in use.");
|
||||
SAMFileReaders readers = new SAMFileReaders(reads);
|
||||
SAMReaders readers = new SAMReaders(reads);
|
||||
allResources.add(readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A collection of readers derived from a reads metadata structure.
|
||||
*/
|
||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
||||
/**
|
||||
* A collection of readers derived from a reads metadata structure.
|
||||
* Internal storage for a map of id -> reader.
|
||||
*/
|
||||
private class SAMFileReaders extends ArrayList<SAMFileReader> {
|
||||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param sourceInfo Metadata for the reads to load.
|
||||
*/
|
||||
public SAMFileReaders(Reads sourceInfo) {
|
||||
for(File readsFile: sourceInfo.getReadsFiles()) {
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile);
|
||||
reader.setValidationStringency(sourceInfo.getValidationStringency());
|
||||
add(reader);
|
||||
}
|
||||
private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();
|
||||
|
||||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param sourceInfo Metadata for the reads to load.
|
||||
*/
|
||||
public SAMReaders(Reads sourceInfo) {
|
||||
for(File readsFile: sourceInfo.getReadsFiles()) {
|
||||
SAMFileReader2 reader = new SAMFileReader2(readsFile);
|
||||
reader.setValidationStringency(sourceInfo.getValidationStringency());
|
||||
readers.put(new SAMReaderID(readsFile),reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the reader from the data structure.
|
||||
* @param id The ID of the reader to retrieve.
|
||||
*/
|
||||
public SAMFileReader getReader(SAMReaderID id) {
|
||||
if(!readers.containsKey(id))
|
||||
throw new NoSuchElementException("No reader is associated with id " + id);
|
||||
return readers.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method to get the header associated with an individual ID.
|
||||
* @param id ID for which to retrieve the header.
|
||||
* @return Header for this SAM file.
|
||||
*/
|
||||
public SAMFileHeader getHeader(SAMReaderID id) {
|
||||
if(!readers.containsKey(id))
|
||||
throw new NoSuchElementException("No reader is associated with id " + id);
|
||||
return readers.get(id).getFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over all readers in this structure.
|
||||
* @return An iterator over readers.
|
||||
*/
|
||||
public Iterator<SAMFileReader> iterator() {
|
||||
return readers.values().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether any readers are present in this structure.
|
||||
* @return
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return readers.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the actual readers out of this data structure.
|
||||
* @return A collection of the readers.
|
||||
*/
|
||||
public Collection<SAMFileReader> values() {
|
||||
return readers.values();
|
||||
}
|
||||
}
|
||||
|
||||
private class ReleasingIterator implements StingSAMIterator {
|
||||
/**
|
||||
* The resource acting as the source of the data.
|
||||
*/
|
||||
private final Collection<SAMFileReader> resource;
|
||||
private final SAMReaders resource;
|
||||
|
||||
/**
|
||||
* The iterator to wrap.
|
||||
|
|
@ -386,7 +423,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
return wrappedIterator.getSourceInfo();
|
||||
}
|
||||
|
||||
public ReleasingIterator( Collection<SAMFileReader> resource, StingSAMIterator wrapped ) {
|
||||
public ReleasingIterator(SAMReaders resource, StingSAMIterator wrapped) {
|
||||
this.resource = resource;
|
||||
this.wrappedIterator = wrapped;
|
||||
}
|
||||
|
|
@ -412,4 +449,9 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
|
|||
return wrappedIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps read groups in the original SAMFileReaders to read groups in
|
||||
*/
|
||||
private class ReadGroupMapping extends HashMap<String,String> {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
|
||||
|
|
@ -17,9 +16,6 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -112,13 +108,8 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
return resourcePool.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to the SAMFileReaders
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, SAMFileReader> getFileToReaderMapping() {
|
||||
return resourcePool.getFileToReaderMapping();
|
||||
public SAMFileHeader getHeader(SAMReaderID id) {
|
||||
return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -128,21 +119,14 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
|
|||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/**
|
||||
* Returns header merger: a class that keeps the mapping between original read groups and read groups
|
||||
* of the merged stream; merger also provides access to the individual file readers (and hence headers
|
||||
* prior to the merging too) maintained by the system.
|
||||
* @return
|
||||
*/
|
||||
public Collection<SAMFileReader> getReaders() { return resourcePool.getHeaderMerger().getReaders(); }
|
||||
|
||||
/** Returns true if there are read group duplicates within the merged headers. */
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return resourcePool.getHeaderMerger().hasReadGroupCollisions();
|
||||
}
|
||||
|
||||
/** Returns the read group id that should be used for the input read and RG id. */
|
||||
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
|
||||
public String getReadGroupId(final SAMReaderID id, final String originalReadGroupId) {
|
||||
SAMFileReader reader = resourcePool.getFileToReaderMapping().get(id.samFile);
|
||||
return resourcePool.getHeaderMerger().getReadGroupId(reader,originalReadGroupId);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
|
|
@ -14,6 +13,8 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
|||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -48,10 +49,14 @@ import java.util.Map;
|
|||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public abstract class SAMDataSource implements SimpleDataSource {
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final Reads reads;
|
||||
|
||||
/**
|
||||
* Identifiers for the readers driving this data source.
|
||||
*/
|
||||
protected final List<SAMReaderID> readerIDs = new ArrayList<SAMReaderID>();
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||
|
||||
|
|
@ -87,6 +92,7 @@ public abstract class SAMDataSource implements SimpleDataSource {
|
|||
if (!smFile.canRead()) {
|
||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||
}
|
||||
readerIDs.add(new SAMReaderID(smFile));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +110,12 @@ public abstract class SAMDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public abstract SAMFileHeader getHeader();
|
||||
|
||||
/**
|
||||
* Gets the (unmerged) header for the given reader.
|
||||
* @param reader Unique identifier for the reader.
|
||||
* @return Unmerged header.
|
||||
*/
|
||||
public abstract SAMFileHeader getHeader(SAMReaderID reader);
|
||||
|
||||
/**
|
||||
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
|
||||
|
|
@ -112,23 +124,27 @@ public abstract class SAMDataSource implements SimpleDataSource {
|
|||
*/
|
||||
public Reads getReadsInfo() { return reads; }
|
||||
|
||||
/**
|
||||
* Returns a mapping from original input files to their (merged) read group ids
|
||||
*
|
||||
* @return the mapping
|
||||
*/
|
||||
public Map<File, SAMFileReader> getFileToReaderMapping() { return null; }
|
||||
|
||||
/**
|
||||
* Returns readers used by this data source.
|
||||
*/
|
||||
public abstract Collection<SAMFileReader> getReaders();
|
||||
public List<SAMReaderID> getReaderIDs() {
|
||||
return readerIDs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the SAM file associated with a given reader ID.
|
||||
* @param id The reader for which to retrieve the source file.
|
||||
* @return the file actually associated with the id.
|
||||
*/
|
||||
public File getSAMFile(SAMReaderID id) {
|
||||
return id.samFile;
|
||||
}
|
||||
|
||||
/** Returns true if there are read group duplicates within the merged headers. */
|
||||
public abstract boolean hasReadGroupCollisions();
|
||||
|
||||
/** Returns the read group id that should be used for the input read and RG id. */
|
||||
public abstract String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId);
|
||||
public abstract String getReadGroupId(final SAMReaderID reader, final String originalReadGroupId);
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Uniquely identifies a SAM file reader.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class SAMReaderID {
|
||||
/**
|
||||
* The SAM file at the heart of this reader. SAMReaderID
|
||||
* currently supports only file-based readers.
|
||||
*/
|
||||
protected final File samFile;
|
||||
|
||||
/**
|
||||
* Creates an identifier for a SAM file based on read.
|
||||
* @param samFile The source file for SAM data.
|
||||
*/
|
||||
protected SAMReaderID(File samFile) {
|
||||
this.samFile = samFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two IDs to see whether they're equal.
|
||||
* @param other The other identifier.
|
||||
* @return True iff the two readers point to the same file.
|
||||
*/
|
||||
public boolean equals(Object other) {
|
||||
if(other == null) return false;
|
||||
if(!(other instanceof SAMReaderID)) return false;
|
||||
|
||||
SAMReaderID otherID = (SAMReaderID)other;
|
||||
return this.samFile.equals(otherID.samFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash code for this object.
|
||||
* @return A hash code, based solely on the file name at this point.
|
||||
*/
|
||||
public int hashCode() {
|
||||
return samFile.hashCode();
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
|
@ -127,9 +128,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
|
||||
if ( NWAY_OUTPUT ) {
|
||||
Map<File, SAMFileReader> readerMap = getToolkit().getFileToReaderMapping();
|
||||
for ( File file : readerMap.keySet() ) {
|
||||
SAMFileHeader header = readerMap.get(file).getFileHeader();
|
||||
List<SAMReaderID> ids = getToolkit().getDataSource().getReaderIDs();
|
||||
for ( SAMReaderID id: ids ) {
|
||||
File file = getToolkit().getDataSource().getSAMFile(id);
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader(id);
|
||||
if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT )
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
String newFileName = file.getName().substring(0, file.getName().length()-3) + outputSuffix + ".bam";
|
||||
|
|
|
|||
Loading…
Reference in New Issue