diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index f5a6cd29f..7496f18fa 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -371,6 +371,24 @@ public class GenomeAnalysisEngine { return locs; } + /** + * Gets a unique identifier for the reader sourcing this read. + * @param read Read to examine. + * @return A unique identifier for the source file of this read. Exception if not found. + */ + public SAMReaderID getReaderIDForRead(final SAMRecord read) { + return getDataSource().getReaderID(read); + } + + /** + * Gets the source file for this read. + * @param id Unique identifier determining which input file to use. + * @return The source filename for this read. + */ + public File getSourceFileForReaderID(final SAMReaderID id) { + return getDataSource().getSAMFile(id); + } + /** * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index abe10b0c5..1a712cbc8 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -175,6 +175,15 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { iterator.close(); } + /** + * Retrieves the id of the reader which built the given read. + * @param read The read to test. + * @return ID of the reader. + */ + public SAMReaderID getReaderID(SAMRecord read) { + return resourcePool.getReaderID(read.getReader()); + } + /** * Adds this read to the given shard. * @param shard The shard to which to add the read. @@ -357,6 +366,20 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { availableResources.add(readers); } + /** + * Gets the reader id for the given reader. + * @param reader Reader for which to determine the id. + * @return id of the given reader. + */ + protected synchronized SAMReaderID getReaderID(SAMFileReader reader) { + for(SAMReaders readers: allResources) { + SAMReaderID id = readers.getReaderID(reader); + if(id != null) + return id; + } + throw new StingException("No such reader id is available"); + } + private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new StingException("Cannot create a new resource pool. All resources are in use."); @@ -413,6 +436,20 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { return readers.get(id); } + /** + * Searches for the reader id of this reader. + * @param reader Reader for which to search. + * @return The id associated the given reader, or null if the reader is not present in this collection. + */ + protected SAMReaderID getReaderID(SAMFileReader reader) { + for(Map.Entry entry: readers.entrySet()) { + if(reader == entry.getValue()) + return entry.getKey(); + } + // Not found? return null. + return null; + } + /** * Returns an iterator over all readers in this structure. * @return An iterator over readers. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java index 322d83f6e..d35e9fa5e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java @@ -120,6 +120,17 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader(); } + /** + * Retrieves the id of the reader which built the given read. + * @param read The read to test. + * @return ID of the reader. + */ + public SAMReaderID getReaderID(SAMRecord read) { + if(resourcePool.readerToIDMap.containsKey(read.getReader())) + return resourcePool.readerToIDMap.get(read.getReader()); + throw new StingException("Unable to find reader id for record."); + } + /** * Returns Reads data structure containing information about the reads data sources placed in this pool as well as * information about how they are downsampled, sorted, and filtered diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java index 66a665171..38899a7c7 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReadStreamResource.java @@ -79,6 +79,11 @@ class ReadStreamResource { */ private Map fileToReaderMap = null; + /** + * A mapping from reader back to the ID uniquely identifying this input file. + */ + private Map readerToIDMap = null; + public ReadStreamResource( Reads sourceInfo ) { SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate); @@ -146,6 +151,10 @@ class ReadStreamResource { return fileToReaderMap; } + public Map getReaderToIDMapping() { + return readerToIDMap; + } + /** * A private function that, given the internal file list, generates a merging construct for * all available files. @@ -160,9 +169,11 @@ class ReadStreamResource { // right now this is pretty damn heavy, it copies the file list into a reader list every time List lst = new ArrayList(); fileToReaderMap = new HashMap(); + readerToIDMap = new HashMap(); for (File f : reads.getReadsFiles()) { SAMFileReader reader = new SAMFileReader(f, eagerDecode); fileToReaderMap.put(f, reader); + readerToIDMap.put(reader,new SAMReaderID(f)); reader.setValidationStringency(reads.getValidationStringency()); final SAMFileHeader header = reader.getFileHeader(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index f60e52a03..32775dbc6 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; import net.sf.picard.filter.FilteringIterator; import net.sf.picard.filter.SamRecordFilter; @@ -122,6 +123,13 @@ public abstract class SAMDataSource implements SimpleDataSource { */ public abstract SAMFileHeader getHeader(SAMReaderID reader); + /** + * Retrieves the id of the reader which built the given read. + * @param read The read to test. + * @return ID of the reader. + */ + public abstract SAMReaderID getReaderID(SAMRecord read); + /** * Returns Reads data structure containing information about the reads data sources placed in this pool as well as * information about how they are downsampled, sorted, and filtered @@ -143,7 +151,7 @@ public abstract class SAMDataSource implements SimpleDataSource { */ public File getSAMFile(SAMReaderID id) { return id.samFile; - } + } /** Returns true if there are read group duplicates within the merged headers. */ public abstract boolean hasReadGroupCollisions(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java index 5d6e73775..9a1504ecb 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMResourcePool.java @@ -54,6 +54,7 @@ class SAMResourcePool extends ResourcePool protected Reads reads; protected SamFileHeaderMerger headerMerger; protected Map fileToReaderMap; + protected Map readerToIDMap; /** * Do all the constituent BAM files have indices? We support some very limited @@ -76,6 +77,7 @@ class SAMResourcePool extends ResourcePool this.headerMerger = streamResource.getHeaderMerger(); this.hasIndex = streamResource.hasIndex(); this.fileToReaderMap = streamResource.getFileToReaderMapping(); + this.readerToIDMap = streamResource.getReaderToIDMapping(); // Add this resource to the pool. this.addNewResource(streamResource);