For Kristian: functions which, given a read, can uniquely identify the BAM file storing that read.

Introducing this into the pile of code which peeks under the covers of the SAMDataSource in the hopes
that this function can help to replace the others and provide a single path for crosstalk.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3103 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-31 20:46:44 +00:00
parent ac9c335cd2
commit 4fcee248f9
6 changed files with 88 additions and 1 deletions

View File

@ -371,6 +371,24 @@ public class GenomeAnalysisEngine {
return locs;
}
/**
* Gets a unique identifier for the reader sourcing this read.
* @param read Read to examine.
* @return A unique identifier for the source file of this read. Exception if not found.
*/
public SAMReaderID getReaderIDForRead(final SAMRecord read) {
return getDataSource().getReaderID(read);
}
/**
* Gets the source file for this read.
* @param id Unique identifier determining which input file to use.
* @return The source filename for this read.
*/
public File getSourceFileForReaderID(final SAMReaderID id) {
return getDataSource().getSAMFile(id);
}
/**
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list

View File

@ -175,6 +175,15 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
iterator.close();
}
/**
* Retrieves the id of the reader which built the given read.
* @param read The read to test.
* @return ID of the reader.
*/
public SAMReaderID getReaderID(SAMRecord read) {
return resourcePool.getReaderID(read.getReader());
}
/**
* Adds this read to the given shard.
* @param shard The shard to which to add the read.
@ -357,6 +366,20 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
availableResources.add(readers);
}
/**
* Gets the reader id for the given reader.
* @param reader Reader for which to determine the id.
* @return id of the given reader.
*/
protected synchronized SAMReaderID getReaderID(SAMFileReader reader) {
for(SAMReaders readers: allResources) {
SAMReaderID id = readers.getReaderID(reader);
if(id != null)
return id;
}
throw new StingException("No such reader id is available");
}
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new StingException("Cannot create a new resource pool. All resources are in use.");
@ -413,6 +436,20 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
return readers.get(id);
}
/**
* Searches for the reader id of this reader.
* @param reader Reader for which to search.
* @return The id associated the given reader, or null if the reader is not present in this collection.
*/
protected SAMReaderID getReaderID(SAMFileReader reader) {
for(Map.Entry<SAMReaderID,SAMFileReader> entry: readers.entrySet()) {
if(reader == entry.getValue())
return entry.getKey();
}
// Not found? return null.
return null;
}
/**
* Returns an iterator over all readers in this structure.
* @return An iterator over readers.

View File

@ -120,6 +120,17 @@ public class IndexDrivenSAMDataSource extends SAMDataSource {
return resourcePool.fileToReaderMap.get(id.samFile).getFileHeader();
}
/**
* Retrieves the id of the reader which built the given read.
* @param read The read to test.
* @return ID of the reader.
*/
public SAMReaderID getReaderID(SAMRecord read) {
if(resourcePool.readerToIDMap.containsKey(read.getReader()))
return resourcePool.readerToIDMap.get(read.getReader());
throw new StingException("Unable to find reader id for record.");
}
/**
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
* information about how they are downsampled, sorted, and filtered

View File

@ -79,6 +79,11 @@ class ReadStreamResource {
*/
private Map<File, SAMFileReader> fileToReaderMap = null;
/**
* A mapping from reader back to the ID uniquely identifying this input file.
*/
private Map<SAMFileReader, SAMReaderID> readerToIDMap = null;
public ReadStreamResource( Reads sourceInfo ) {
SamFileHeaderMerger headerMerger = createHeaderMerger(sourceInfo, SAMFileHeader.SortOrder.coordinate);
@ -146,6 +151,10 @@ class ReadStreamResource {
return fileToReaderMap;
}
public Map<SAMFileReader,SAMReaderID> getReaderToIDMapping() {
return readerToIDMap;
}
/**
* A private function that, given the internal file list, generates a merging construct for
* all available files.
@ -160,9 +169,11 @@ class ReadStreamResource {
// right now this is pretty damn heavy, it copies the file list into a reader list every time
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
fileToReaderMap = new HashMap<File, SAMFileReader>();
readerToIDMap = new HashMap<SAMFileReader,SAMReaderID>();
for (File f : reads.getReadsFiles()) {
SAMFileReader reader = new SAMFileReader(f, eagerDecode);
fileToReaderMap.put(f, reader);
readerToIDMap.put(reader,new SAMReaderID(f));
reader.setValidationStringency(reads.getValidationStringency());
final SAMFileHeader header = reader.getFileHeader();

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
@ -122,6 +123,13 @@ public abstract class SAMDataSource implements SimpleDataSource {
*/
public abstract SAMFileHeader getHeader(SAMReaderID reader);
/**
* Retrieves the id of the reader which built the given read.
* @param read The read to test.
* @return ID of the reader.
*/
public abstract SAMReaderID getReaderID(SAMRecord read);
/**
* Returns Reads data structure containing information about the reads data sources placed in this pool as well as
* information about how they are downsampled, sorted, and filtered
@ -143,7 +151,7 @@ public abstract class SAMDataSource implements SimpleDataSource {
*/
public File getSAMFile(SAMReaderID id) {
return id.samFile;
}
}
/** Returns true if there are read group duplicates within the merged headers. */
public abstract boolean hasReadGroupCollisions();

View File

@ -54,6 +54,7 @@ class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator>
protected Reads reads;
protected SamFileHeaderMerger headerMerger;
protected Map<File, SAMFileReader> fileToReaderMap;
protected Map<SAMFileReader, SAMReaderID> readerToIDMap;
/**
* Do all the constituent BAM files have indices? We support some very limited
@ -76,6 +77,7 @@ class SAMResourcePool extends ResourcePool<ReadStreamResource, StingSAMIterator>
this.headerMerger = streamResource.getHeaderMerger();
this.hasIndex = streamResource.hasIndex();
this.fileToReaderMap = streamResource.getFileToReaderMapping();
this.readerToIDMap = streamResource.getReaderToIDMapping();
// Add this resource to the pool.
this.addNewResource(streamResource);