Fixed one low-memory sharder performance culprit: regions with no BAM data
whatsoever were misusing the Picard MergingIterator, triggering a re-traversal through the entire contig. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5376 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
337b54136f
commit
10516f5de4
|
|
@ -103,15 +103,16 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
|
||||
for(SAMReaderID reader: dataSource.getReaderIDs()) {
|
||||
GATKBAMIndex index = (GATKBAMIndex)dataSource.getIndex(reader);
|
||||
|
||||
BinTree binTree = getNextOverlappingBinTree(reader,(GATKBAMIndex)dataSource.getIndex(reader),currentLocus);
|
||||
if(binTree != null) {
|
||||
coveredRegionStart = Math.max(coveredRegionStart,binTree.getStart());
|
||||
coveredRegionStop = Math.min(coveredRegionStop,binTree.getStop());
|
||||
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
|
||||
|
||||
GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus);
|
||||
nextFilePointer.addFileSpans(reader,fileSpan);
|
||||
}
|
||||
|
||||
GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus);
|
||||
nextFilePointer.addFileSpans(reader,fileSpan);
|
||||
}
|
||||
|
||||
// Early exit if no bins were found.
|
||||
|
|
@ -195,6 +196,10 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
* @return File span mapping to given region.
|
||||
*/
|
||||
private GATKBAMFileSpan generateFileSpan(final SAMReaderID reader, final GATKBAMIndex index, final BinTree binTree, final GenomeLoc initialRegion) {
|
||||
// Empty bin trees mean empty file spans.
|
||||
if(binTree == null)
|
||||
return new GATKBAMFileSpan();
|
||||
|
||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(binTree.size());
|
||||
for(GATKBin bin: binTree.getBins()) {
|
||||
if(bin == null)
|
||||
|
|
|
|||
Loading…
Reference in New Issue