New fork of SamFileHeaderMerger should be cached at the thread level to enable fast (and valid) thread lookups.
This commit is contained in:
parent
bc842ab3a5
commit
1ead00cac5
|
|
@ -100,11 +100,6 @@ public class SAMDataSource {
|
||||||
*/
|
*/
|
||||||
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
|
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
|
||||||
|
|
||||||
/**
|
|
||||||
* Cached representation of the merged header used to generate a merging iterator.
|
|
||||||
*/
|
|
||||||
private final SamFileHeaderMerger headerMerger;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The merged header.
|
* The merged header.
|
||||||
*/
|
*/
|
||||||
|
|
@ -300,9 +295,8 @@ public class SAMDataSource {
|
||||||
|
|
||||||
initializeReaderPositions(readers);
|
initializeReaderPositions(readers);
|
||||||
|
|
||||||
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
|
mergedHeader = readers.getMergedHeader();
|
||||||
mergedHeader = headerMerger.getMergedHeader();
|
hasReadGroupCollisions = readers.hasReadGroupCollisions();
|
||||||
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
|
|
||||||
|
|
||||||
readProperties = new ReadProperties(
|
readProperties = new ReadProperties(
|
||||||
samFiles,
|
samFiles,
|
||||||
|
|
@ -327,9 +321,9 @@ public class SAMDataSource {
|
||||||
|
|
||||||
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
||||||
for(SAMReadGroupRecord readGroup: readGroups) {
|
for(SAMReadGroupRecord readGroup: readGroups) {
|
||||||
if(headerMerger.hasReadGroupCollisions()) {
|
if(hasReadGroupCollisions) {
|
||||||
mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId()));
|
||||||
mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId());
|
mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId());
|
||||||
} else {
|
} else {
|
||||||
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||||
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||||
|
|
@ -562,7 +556,7 @@ public class SAMDataSource {
|
||||||
*/
|
*/
|
||||||
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
|
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
|
||||||
// Set up merging to dynamically merge together multiple BAMs.
|
// Set up merging to dynamically merge together multiple BAMs.
|
||||||
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
|
MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
|
||||||
|
|
||||||
for(SAMReaderID id: getReaderIDs()) {
|
for(SAMReaderID id: getReaderIDs()) {
|
||||||
CloseableIterator<SAMRecord> iterator = null;
|
CloseableIterator<SAMRecord> iterator = null;
|
||||||
|
|
@ -707,6 +701,11 @@ public class SAMDataSource {
|
||||||
* A collection of readers derived from a reads metadata structure.
|
* A collection of readers derived from a reads metadata structure.
|
||||||
*/
|
*/
|
||||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
private class SAMReaders implements Iterable<SAMFileReader> {
|
||||||
|
/**
|
||||||
|
* Cached representation of the merged header used to generate a merging iterator.
|
||||||
|
*/
|
||||||
|
private final SamFileHeaderMerger headerMerger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal storage for a map of id -> reader.
|
* Internal storage for a map of id -> reader.
|
||||||
*/
|
*/
|
||||||
|
|
@ -798,6 +797,11 @@ public class SAMDataSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||||
|
|
||||||
|
Collection<SAMFileHeader> headers = new LinkedList<SAMFileHeader>();
|
||||||
|
for(SAMFileReader reader: readers.values())
|
||||||
|
headers.add(reader.getFileHeader());
|
||||||
|
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
|
||||||
}
|
}
|
||||||
|
|
||||||
final private void printReaderPerformance(final int nExecutedTotal,
|
final private void printReaderPerformance(final int nExecutedTotal,
|
||||||
|
|
@ -814,7 +818,37 @@ public class SAMDataSource {
|
||||||
nExecutedInTick, tickDurationInSec,
|
nExecutedInTick, tickDurationInSec,
|
||||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the header derived from the merging of these BAM files.
|
||||||
|
* @return the merged header.
|
||||||
|
*/
|
||||||
|
public SAMFileHeader getMergedHeader() {
|
||||||
|
return headerMerger.getMergedHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do multiple read groups collide in this dataset?
|
||||||
|
* @return True if multiple read groups collide; false otherwis.
|
||||||
|
*/
|
||||||
|
public boolean hasReadGroupCollisions() {
|
||||||
|
return headerMerger.hasReadGroupCollisions();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the newly mapped read group ID for the given read group.
|
||||||
|
* @param readerID Reader for which to discern the transformed ID.
|
||||||
|
* @param originalReadGroupID Original read group.
|
||||||
|
* @return Remapped read group.
|
||||||
|
*/
|
||||||
|
public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) {
|
||||||
|
SAMFileHeader header = readers.get(readerID).getFileHeader();
|
||||||
|
return headerMerger.getReadGroupId(header,originalReadGroupID);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MergingSamRecordIterator createMergingIterator() {
|
||||||
|
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -866,25 +900,6 @@ public class SAMDataSource {
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return readers.isEmpty();
|
return readers.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets all the actual readers out of this data structure.
|
|
||||||
* @return A collection of the readers.
|
|
||||||
*/
|
|
||||||
public Collection<SAMFileReader> values() {
|
|
||||||
return readers.values();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets all the actual readers out of this data structure.
|
|
||||||
* @return A collection of the readers.
|
|
||||||
*/
|
|
||||||
public Collection<SAMFileHeader> headers() {
|
|
||||||
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
|
|
||||||
for (SAMFileReader reader : values())
|
|
||||||
headers.add(reader.getFileHeader());
|
|
||||||
return headers;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class ReaderInitializer implements Callable<ReaderInitializer> {
|
class ReaderInitializer implements Callable<ReaderInitializer> {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue