New fork of SamFileHeaderMerger should be cached at the thread level to enable fast (and valid) thread lookups.

This commit is contained in:
Matt Hanna 2011-12-18 19:04:26 -05:00
parent bc842ab3a5
commit 1ead00cac5
1 changed files with 46 additions and 31 deletions

View File

@ -100,11 +100,6 @@ public class SAMDataSource {
*/
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/**
* The merged header.
*/
@ -300,9 +295,8 @@ public class SAMDataSource {
initializeReaderPositions(readers);
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
mergedHeader = headerMerger.getMergedHeader();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
mergedHeader = readers.getMergedHeader();
hasReadGroupCollisions = readers.hasReadGroupCollisions();
readProperties = new ReadProperties(
samFiles,
@ -327,9 +321,9 @@ public class SAMDataSource {
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups) {
if(headerMerger.hasReadGroupCollisions()) {
mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId());
if(hasReadGroupCollisions) {
mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId()));
mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId());
} else {
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
@ -562,7 +556,7 @@ public class SAMDataSource {
*/
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
// Set up merging to dynamically merge together multiple BAMs.
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
for(SAMReaderID id: getReaderIDs()) {
CloseableIterator<SAMRecord> iterator = null;
@ -707,6 +701,11 @@ public class SAMDataSource {
* A collection of readers derived from a reads metadata structure.
*/
private class SAMReaders implements Iterable<SAMFileReader> {
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/**
* Internal storage for a map of id -> reader.
*/
@ -798,6 +797,11 @@ public class SAMDataSource {
}
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
Collection<SAMFileHeader> headers = new LinkedList<SAMFileHeader>();
for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
}
final private void printReaderPerformance(final int nExecutedTotal,
@ -814,7 +818,37 @@ public class SAMDataSource {
nExecutedInTick, tickDurationInSec,
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
nRemaining, estTimeToComplete, estTimeToComplete / 60));
}
/**
* Return the header derived from the merging of these BAM files.
* @return the merged header.
*/
public SAMFileHeader getMergedHeader() {
return headerMerger.getMergedHeader();
}
/**
* Do multiple read groups collide in this dataset?
* @return True if multiple read groups collide; false otherwis.
*/
public boolean hasReadGroupCollisions() {
return headerMerger.hasReadGroupCollisions();
}
/**
* Get the newly mapped read group ID for the given read group.
* @param readerID Reader for which to discern the transformed ID.
* @param originalReadGroupID Original read group.
* @return Remapped read group.
*/
public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) {
SAMFileHeader header = readers.get(readerID).getFileHeader();
return headerMerger.getReadGroupId(header,originalReadGroupID);
}
public MergingSamRecordIterator createMergingIterator() {
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
}
/**
@ -866,25 +900,6 @@ public class SAMDataSource {
public boolean isEmpty() {
return readers.isEmpty();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileReader> values() {
return readers.values();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileHeader> headers() {
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (SAMFileReader reader : values())
headers.add(reader.getFileHeader());
return headers;
}
}
class ReaderInitializer implements Callable<ReaderInitializer> {