Fixed one low-memory sharder performance culprit: regions with no BAM data

whatsoever were misusing the Picard MergingIterator, triggering a re-traversal 
through the entire contig.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5376 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-03-04 21:26:22 +00:00
parent 337b54136f
commit 10516f5de4
1 changed files with 8 additions and 3 deletions

View File

@ -103,15 +103,16 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
for(SAMReaderID reader: dataSource.getReaderIDs()) {
GATKBAMIndex index = (GATKBAMIndex)dataSource.getIndex(reader);
BinTree binTree = getNextOverlappingBinTree(reader,(GATKBAMIndex)dataSource.getIndex(reader),currentLocus);
if(binTree != null) {
coveredRegionStart = Math.max(coveredRegionStart,binTree.getStart());
coveredRegionStop = Math.min(coveredRegionStop,binTree.getStop());
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus);
nextFilePointer.addFileSpans(reader,fileSpan);
}
GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus);
nextFilePointer.addFileSpans(reader,fileSpan);
}
// Early exit if no bins were found.
@ -195,6 +196,10 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
* @return File span mapping to given region.
*/
private GATKBAMFileSpan generateFileSpan(final SAMReaderID reader, final GATKBAMIndex index, final BinTree binTree, final GenomeLoc initialRegion) {
// Empty bin trees mean empty file spans.
if(binTree == null)
return new GATKBAMFileSpan();
List<GATKChunk> chunks = new ArrayList<GATKChunk>(binTree.size());
for(GATKBin bin: binTree.getBins()) {
if(bin == null)