First pass at handling SAMFileReaders using a SAMReaderID. This allows us to firewall

GATK users from the readers, which they could abuse in ways that could destabilize the GATK.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2923 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-04 00:59:32 +00:00
parent b241e0915b
commit 023654696e
7 changed files with 285 additions and 215 deletions

View File

@ -185,16 +185,4 @@ public class SAMFileReader2 extends SAMFileReader {
throw new StingException("Unable to run method findIndexFile",ex);
}
}
@Override
public boolean equals(Object other) {
if(other == null) return false;
if(!(other instanceof SAMFileReader2)) return false;
return this.sourceFile.equals(((SAMFileReader2)other).sourceFile);
}
@Override
public int hashCode() {
return sourceFile.hashCode();
}
}

View File

@ -30,10 +30,7 @@ import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.IndexDrivenSAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.*;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
@ -344,18 +341,16 @@ public class GenomeAnalysisEngine {
* @return
*/
public List<Set<String>> getSamplesByReaders() {
Collection<SAMFileReader> readers = getDataSource().getReaders();
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
for (SAMFileReader r : readers) {
for (SAMReaderID r : readers) {
Set<String> samples = new HashSet<String>(1);
sample_sets.add(samples);
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
samples.add(g.getSample());
}
}
@ -375,16 +370,16 @@ public class GenomeAnalysisEngine {
public List<Set<String>> getLibrariesByReaders() {
Collection<SAMFileReader> readers = getDataSource().getReaders();
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
for (SAMFileReader r : readers) {
for (SAMReaderID r : readers) {
Set<String> libs = new HashSet<String>(2);
lib_sets.add(libs);
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
libs.add(g.getLibrary());
}
}
@ -393,42 +388,30 @@ public class GenomeAnalysisEngine {
}
/**
* Returns a mapping from original input files to the SAMFileReaders
*
* @return the mapping
*/
public Map<File, SAMFileReader> getFileToReaderMapping() {
return getDataSource().getFileToReaderMapping();
}
/**
* Returns a mapping from original input files to their (merged) read group ids
*
* @return the mapping
*/
public Map<File, Set<String>> getFileToReadGroupIdMapping() {
Map<File, SAMFileReader> fileToReaderMap = getFileToReaderMapping();
// populate the file -> read group mapping
Map<File, Set<String>> fileToReadGroupIdMap = new HashMap<File, Set<String>>();
for (Map.Entry<File, SAMFileReader> entry : fileToReaderMap.entrySet()) {
for (SAMReaderID id: getDataSource().getReaderIDs()) {
Set<String> readGroups = new HashSet<String>(5);
for (SAMReadGroupRecord g : entry.getValue().getFileHeader().getReadGroups()) {
for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) {
if (getDataSource().hasReadGroupCollisions()) {
// Check if there were read group clashes.
// If there were, use the SamFileHeaderMerger to translate from the
// original read group id to the read group id in the merged stream
readGroups.add(getDataSource().getReadGroupId(entry.getValue(), g.getReadGroupId()));
readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
readGroups.add(g.getReadGroupId());
}
}
fileToReadGroupIdMap.put(entry.getKey(), readGroups);
fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups);
}
return fileToReadGroupIdMap;
@ -447,16 +430,16 @@ public class GenomeAnalysisEngine {
public List<Set<String>> getMergedReadGroupsByReaders() {
Collection<SAMFileReader> readers = getDataSource().getReaders();
List<SAMReaderID> readers = getDataSource().getReaderIDs();
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
for (SAMFileReader r : readers) {
for (SAMReaderID r : readers) {
Set<String> groups = new HashSet<String>(5);
rg_sets.add(groups);
for (SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) {
if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
// use HeaderMerger to translate original read group id from the reader into the read group id in the
// merged stream, and save that remapped read group id to associate it with specific reader
@ -789,6 +772,15 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader();
}
/**
* Returns the unmerged SAM file header for an individual reader.
* @param reader The reader.
* @return Header for that reader.
*/
public SAMFileHeader getSAMFileHeader(SAMReaderID reader) {
return readsDataSource.getHeader(reader);
}
/**
* Returns data source object encapsulating all essential info and handlers used to traverse
* reads; header merger, individual file readers etc can be accessed through the returned data source object.

View File

@ -23,7 +23,6 @@ import java.io.File;
* @version 0.1
*/
public class BlockDrivenSAMDataSource extends SAMDataSource {
/**
* A collection of readers driving the merging process.
*/
@ -32,7 +31,17 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
/**
* The merged header.
*/
private final SAMFileHeader header;
private final SAMFileHeader mergedHeader;
/**
* Whether the read groups in overlapping files collide.
*/
private final boolean hasReadGroupCollisions;
/**
* Maps the SAM readers' original read group ids to their revised ids.
*/
private final Map<SAMReaderID,ReadGroupMapping> mergedReadGroupMappings = new HashMap<SAMReaderID,ReadGroupMapping>();
/**
* Create a new block-aware SAM data source given the supplied read metadata.
@ -44,32 +53,36 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
logger.warn("Experimental sharding is enabled. Many use cases are not supported. Please use with care.");
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
header = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true).getMergedHeader();
SAMReaders readers = resourcePool.getAvailableReaders();
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
mergedHeader = headerMerger.getMergedHeader();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
for(SAMReaderID id: readerIDs) {
SAMFileReader reader = readers.getReader(id);
ReadGroupMapping mapping = new ReadGroupMapping();
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups)
mapping.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
mergedReadGroupMappings.put(id,mapping);
}
resourcePool.releaseReaders(readers);
}
public boolean hasIndex() {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
return hasIndex(readers);
}
finally {
resourcePool.releaseReaders(readers);
}
}
/**
* Report whether a given collection of SAM file readers is indexed.
* @param readers The collection of readers.
* @return True if the given collection of readers is indexed.
* True if all readers have an index.
* @return
*/
private boolean hasIndex(Collection<SAMFileReader> readers) {
for(SAMFileReader reader: readers) {
public boolean hasIndex() {
for(SAMFileReader reader: resourcePool.getReadersWithoutLocking()) {
if(!reader.hasIndex())
return false;
}
return true;
return true;
}
/**
@ -78,19 +91,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return A map of reader back to bin.
*/
public List<Bin> getOverlappingBins(final GenomeLoc location) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
SAMReaders readers = resourcePool.getReadersWithoutLocking();
if(readers.isEmpty())
return Collections.emptyList();
try {
if(readers.size() == 0)
return Collections.emptyList();
// All readers will have the same bin structure, so just use the first bin as an example.
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
}
finally {
resourcePool.releaseReaders(readers);
}
// All readers will have the same bin structure, so just use the first bin as an example.
SAMFileReader2 reader = (SAMFileReader2)readers.iterator().next();
return reader.getOverlappingBins(location.getContig(),(int)location.getStart(),(int)location.getStop());
}
/**
@ -99,18 +106,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return A map of the file pointers bounding the bin.
*/
public Map<SAMFileReader2,List<Chunk>> getFilePointersBounding(Bin bin) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader;
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
}
return filePointers;
}
finally {
resourcePool.releaseReaders(readers);
SAMReaders readers = resourcePool.getReadersWithoutLocking();
Map<SAMFileReader2,List<Chunk>> filePointers = new HashMap<SAMFileReader2,List<Chunk>>();
for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader;
filePointers.put(reader2,reader2.getFilePointersBounding(bin));
}
return filePointers;
}
/**
@ -118,18 +120,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return A mapping of reader to current position.
*/
public Map<SAMFileReader2,Chunk> getCurrentPosition() {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader;
currentPositions.put(reader2,reader2.getCurrentPosition());
}
return currentPositions;
}
finally {
resourcePool.releaseReaders(readers);
SAMReaders readers = resourcePool.getReadersWithoutLocking();
Map<SAMFileReader2,Chunk> currentPositions = new HashMap<SAMFileReader2,Chunk>();
for(SAMFileReader reader: readers) {
SAMFileReader2 reader2 = (SAMFileReader2)reader;
currentPositions.put(reader2,reader2.getCurrentPosition());
}
return currentPositions;
}
/**
@ -137,18 +134,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return Number of levels in this index.
*/
public int getNumIndexLevels() {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
if(readers.size() == 0)
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
if(!hasIndex(readers))
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getNumIndexLevels();
}
finally {
resourcePool.releaseReaders(readers);
}
SAMReaders readers = resourcePool.getReadersWithoutLocking();
if(readers.isEmpty())
throw new StingException("Unable to determine number of index levels; no BAMs are present.");
if(!hasIndex())
throw new SAMException("Unable to determine number of index levels; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getNumIndexLevels();
}
/**
@ -157,18 +149,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return the level associated with the given bin number.
*/
public int getLevelForBin(final Bin bin) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
if(readers.size() == 0)
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex(readers))
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getLevelForBin(bin);
}
finally {
resourcePool.releaseReaders(readers);
}
SAMReaders readers = resourcePool.getReadersWithoutLocking();
if(readers.isEmpty())
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex())
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getLevelForBin(bin);
}
/**
@ -177,18 +164,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return The last position that the given bin can represent.
*/
public int getFirstLocusInBin(final Bin bin) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
if(readers.size() == 0)
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex(readers))
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getFirstLocusInBin(bin);
}
finally {
resourcePool.releaseReaders(readers);
}
SAMReaders readers = resourcePool.getReadersWithoutLocking();
if(readers.isEmpty())
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex())
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getFirstLocusInBin(bin);
}
/**
@ -197,18 +179,13 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return The last position that the given bin can represent.
*/
public int getLastLocusInBin(final Bin bin) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
try {
if(readers.size() == 0)
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex(readers))
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getLastLocusInBin(bin);
}
finally {
resourcePool.releaseReaders(readers);
}
SAMReaders readers = resourcePool.getReadersWithoutLocking();
if(readers.isEmpty())
throw new StingException("Unable to determine number of level for bin; no BAMs are present.");
if(!hasIndex())
throw new SAMException("Unable to determine number of level for bin; BAM file index is not present.");
SAMFileReader2 firstReader = (SAMFileReader2)readers.iterator().next();
return firstReader.getLastLocusInBin(bin);
}
/**
@ -252,7 +229,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
}
private StingSAMIterator getIterator(BAMFormatAwareShard shard, boolean enableVerification) {
Collection<SAMFileReader> readers = resourcePool.getAvailableReaders();
SAMReaders readers = resourcePool.getAvailableReaders();
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
for(SAMFileReader reader: readers) {
@ -261,7 +238,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
readerToIteratorMap.put(reader2,reader2.iterator(chunks));
}
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers,SAMFileHeader.SortOrder.coordinate,true);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
CloseableIterator<SAMRecord> iterator = new MergingSamRecordIterator(headerMerger,readerToIteratorMap,true);
@ -280,15 +257,11 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return The merged header.
*/
public SAMFileHeader getHeader() {
return header;
return mergedHeader;
}
/**
* Currently unsupported.
* @return
*/
public Collection<SAMFileReader> getReaders() {
throw new StingException("Currently unable to get readers for shard-based fields.");
public SAMFileHeader getHeader(SAMReaderID id) {
return resourcePool.getReadersWithoutLocking().getReader(id).getFileHeader();
}
/**
@ -296,15 +269,15 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
* @return False always.
*/
public boolean hasReadGroupCollisions() {
return false;
return hasReadGroupCollisions;
}
/**
* Currently unsupported.
* @return
* Gets the revised read group id mapped to this 'original' read group id.
* @return Merged read group ID.
*/
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
throw new UnsupportedOperationException("Getting read group ID from this experimental SAM reader is not currently supported.");
public String getReadGroupId(final SAMReaderID reader, final String originalReadGroupId) {
return mergedReadGroupMappings.get(reader).get(originalReadGroupId);
}
private class SAMResourcePool {
@ -316,66 +289,130 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
/**
* All iterators of this reference-ordered data.
*/
private List<SAMFileReaders> allResources = new ArrayList<SAMFileReaders>();
private List<SAMReaders> allResources = new ArrayList<SAMReaders>();
/**
* All iterators that are not currently in service.
*/
private List<SAMFileReaders> availableResources = new ArrayList<SAMFileReaders>();
private List<SAMReaders> availableResources = new ArrayList<SAMReaders>();
public SAMResourcePool(final int maxEntries) {
this.maxEntries = maxEntries;
}
/**
* Dangerous internal method; retrieves any set of readers, whether in iteration or not.
* Used to handle non-exclusive, stateless operations, such as index queries.
* @return Any collection of SAMReaders, whether in iteration or not.
*/
protected SAMReaders getReadersWithoutLocking() {
synchronized(this) {
if(allResources.size() == 0)
createNewResource();
}
return allResources.get(0);
}
/**
* Choose a set of readers from the pool to use for this query. When complete,
* @return
*/
public synchronized Collection<SAMFileReader> getAvailableReaders() {
public synchronized SAMReaders getAvailableReaders() {
if(availableResources.size() == 0)
createNewResource();
SAMFileReaders readers = availableResources.get(0);
SAMReaders readers = availableResources.get(0);
availableResources.remove(readers);
return readers;
}
public synchronized void releaseReaders(Collection<SAMFileReader> readers) {
public synchronized void releaseReaders(SAMReaders readers) {
if(!allResources.contains(readers))
throw new StingException("Tried to return readers from the pool that didn't originate in the pool.");
availableResources.add((SAMFileReaders)readers);
availableResources.add(readers);
}
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new StingException("Cannot create a new resource pool. All resources are in use.");
SAMFileReaders readers = new SAMFileReaders(reads);
SAMReaders readers = new SAMReaders(reads);
allResources.add(readers);
availableResources.add(readers);
}
}
/**
* A collection of readers derived from a reads metadata structure.
*/
private class SAMReaders implements Iterable<SAMFileReader> {
/**
* A collection of readers derived from a reads metadata structure.
* Internal storage for a map of id -> reader.
*/
private class SAMFileReaders extends ArrayList<SAMFileReader> {
/**
* Derive a new set of readers from the Reads metadata.
* @param sourceInfo Metadata for the reads to load.
*/
public SAMFileReaders(Reads sourceInfo) {
for(File readsFile: sourceInfo.getReadsFiles()) {
SAMFileReader2 reader = new SAMFileReader2(readsFile);
reader.setValidationStringency(sourceInfo.getValidationStringency());
add(reader);
}
private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();
/**
* Derive a new set of readers from the Reads metadata.
* @param sourceInfo Metadata for the reads to load.
*/
public SAMReaders(Reads sourceInfo) {
for(File readsFile: sourceInfo.getReadsFiles()) {
SAMFileReader2 reader = new SAMFileReader2(readsFile);
reader.setValidationStringency(sourceInfo.getValidationStringency());
readers.put(new SAMReaderID(readsFile),reader);
}
}
}
/**
* Retrieve the reader from the data structure.
* @param id The ID of the reader to retrieve.
*/
public SAMFileReader getReader(SAMReaderID id) {
if(!readers.containsKey(id))
throw new NoSuchElementException("No reader is associated with id " + id);
return readers.get(id);
}
/**
* Convenience method to get the header associated with an individual ID.
* @param id ID for which to retrieve the header.
* @return Header for this SAM file.
*/
public SAMFileHeader getHeader(SAMReaderID id) {
if(!readers.containsKey(id))
throw new NoSuchElementException("No reader is associated with id " + id);
return readers.get(id).getFileHeader();
}
/**
* Returns an iterator over all readers in this structure.
* @return An iterator over readers.
*/
public Iterator<SAMFileReader> iterator() {
return readers.values().iterator();
}
/**
* Returns whether any readers are present in this structure.
* @return
*/
public boolean isEmpty() {
return readers.isEmpty();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileReader> values() {
return readers.values();
}
}
private class ReleasingIterator implements StingSAMIterator {
/**
* The resource acting as the source of the data.
*/
private final Collection<SAMFileReader> resource;
private final SAMReaders resource;
/**
* The iterator to wrap.
@ -386,7 +423,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
return wrappedIterator.getSourceInfo();
}
public ReleasingIterator( Collection<SAMFileReader> resource, StingSAMIterator wrapped ) {
public ReleasingIterator(SAMReaders resource, StingSAMIterator wrapped) {
this.resource = resource;
this.wrappedIterator = wrapped;
}
@ -412,4 +449,9 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
return wrappedIterator.next();
}
}
/**
* Maps read groups in the original SAMFileReaders to read groups in
*/
private class ReadGroupMapping extends HashMap<String,String> {}
}

View File

@ -4,7 +4,6 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.sam.SamFileHeaderMerger;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
@ -17,9 +16,6 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.util.*;
import java.io.File;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -112,13 +108,8 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
return resourcePool.getHeader();
}
/**
* Returns a mapping from original input files to the SAMFileReaders
*
* @return the mapping
*/
public Map<File, SAMFileReader> getFileToReaderMapping() {
return resourcePool.getFileToReaderMapping();
public SAMFileHeader getHeader(SAMReaderID id) {
return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader();
}
/**
@ -128,21 +119,14 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
*/
public Reads getReadsInfo() { return reads; }
/**
* Returns header merger: a class that keeps the mapping between original read groups and read groups
* of the merged stream; merger also provides access to the individual file readers (and hence headers
* prior to the merging too) maintained by the system.
* @return
*/
public Collection<SAMFileReader> getReaders() { return resourcePool.getHeaderMerger().getReaders(); }
/** Returns true if there are read group duplicates within the merged headers. */
public boolean hasReadGroupCollisions() {
return resourcePool.getHeaderMerger().hasReadGroupCollisions();
}
/** Returns the read group id that should be used for the input read and RG id. */
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
public String getReadGroupId(final SAMReaderID id, final String originalReadGroupId) {
SAMFileReader reader = resourcePool.getFileToReaderMapping().get(id.samFile);
return resourcePool.getHeaderMerger().getReadGroupId(reader,originalReadGroupId);
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
@ -14,6 +13,8 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;
import java.util.Collection;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
/*
* Copyright (c) 2009 The Broad Institute
@ -48,10 +49,14 @@ import java.util.Map;
* Converts shards to SAM iterators over the specified region
*/
public abstract class SAMDataSource implements SimpleDataSource {
/** Backing support for reads. */
protected final Reads reads;
/**
* Identifiers for the readers driving this data source.
*/
protected final List<SAMReaderID> readerIDs = new ArrayList<SAMReaderID>();
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
@ -87,6 +92,7 @@ public abstract class SAMDataSource implements SimpleDataSource {
if (!smFile.canRead()) {
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
}
readerIDs.add(new SAMReaderID(smFile));
}
}
@ -104,6 +110,12 @@ public abstract class SAMDataSource implements SimpleDataSource {
*/
public abstract SAMFileHeader getHeader();
/**
* Gets the (unmerged) header for the given reader.
* @param reader Unique identifier for the reader.
* @return Unmerged header.
*/
public abstract SAMFileHeader getHeader(SAMReaderID reader);
/**
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
@ -112,23 +124,27 @@ public abstract class SAMDataSource implements SimpleDataSource {
*/
public Reads getReadsInfo() { return reads; }
/**
* Returns a mapping from original input files to their (merged) read group ids
*
* @return the mapping
*/
public Map<File, SAMFileReader> getFileToReaderMapping() { return null; }
/**
* Returns readers used by this data source.
*/
public abstract Collection<SAMFileReader> getReaders();
public List<SAMReaderID> getReaderIDs() {
return readerIDs;
}
/**
* Gets the SAM file associated with a given reader ID.
* @param id The reader for which to retrieve the source file.
* @return the file actually associated with the id.
*/
public File getSAMFile(SAMReaderID id) {
return id.samFile;
}
/** Returns true if there are read group duplicates within the merged headers. */
public abstract boolean hasReadGroupCollisions();
/** Returns the read group id that should be used for the input read and RG id. */
public abstract String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId);
public abstract String getReadGroupId(final SAMReaderID reader, final String originalReadGroupId);
/**
*

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import java.io.File;
/**
* Uniquely identifies a SAM file reader.
*
* @author mhanna
* @version 0.1
*/
public class SAMReaderID {
/**
* The SAM file at the heart of this reader. SAMReaderID
* currently supports only file-based readers.
*/
protected final File samFile;
/**
* Creates an identifier for a SAM file based on read.
* @param samFile The source file for SAM data.
*/
protected SAMReaderID(File samFile) {
this.samFile = samFile;
}
/**
* Compare two IDs to see whether they're equal.
* @param other The other identifier.
* @return True iff the two readers point to the same file.
*/
public boolean equals(Object other) {
if(other == null) return false;
if(!(other instanceof SAMReaderID)) return false;
SAMReaderID otherID = (SAMReaderID)other;
return this.samFile.equals(otherID.samFile);
}
/**
* Generate a hash code for this object.
* @return A hash code, based solely on the file name at this point.
*/
public int hashCode() {
return samFile.hashCode();
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -127,9 +128,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
SAMFileWriterFactory factory = new SAMFileWriterFactory();
if ( NWAY_OUTPUT ) {
Map<File, SAMFileReader> readerMap = getToolkit().getFileToReaderMapping();
for ( File file : readerMap.keySet() ) {
SAMFileHeader header = readerMap.get(file).getFileHeader();
List<SAMReaderID> ids = getToolkit().getDataSource().getReaderIDs();
for ( SAMReaderID id: ids ) {
File file = getToolkit().getDataSource().getSAMFile(id);
SAMFileHeader header = getToolkit().getSAMFileHeader(id);
if ( SORTING_STRATEGY == RealignerSortingStrategy.NO_SORT )
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
String newFileName = file.getName().substring(0, file.getName().length()-3) + outputSuffix + ".bam";