Fixed one low-memory sharder performance culprit: regions with no BAM data

whatsoever were misusing the Picard MergingIterator, triggering a re-traversal 
through the entire contig.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5376 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-03-04 21:26:22 +00:00
parent 337b54136f
commit 10516f5de4
1 changed files with 8 additions and 3 deletions

View File

@ -103,16 +103,17 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
for(SAMReaderID reader: dataSource.getReaderIDs()) { for(SAMReaderID reader: dataSource.getReaderIDs()) {
GATKBAMIndex index = (GATKBAMIndex)dataSource.getIndex(reader); GATKBAMIndex index = (GATKBAMIndex)dataSource.getIndex(reader);
BinTree binTree = getNextOverlappingBinTree(reader,(GATKBAMIndex)dataSource.getIndex(reader),currentLocus); BinTree binTree = getNextOverlappingBinTree(reader,(GATKBAMIndex)dataSource.getIndex(reader),currentLocus);
if(binTree != null) { if(binTree != null) {
coveredRegionStart = Math.max(coveredRegionStart,binTree.getStart()); coveredRegionStart = Math.max(coveredRegionStart,binTree.getStart());
coveredRegionStop = Math.min(coveredRegionStop,binTree.getStop()); coveredRegionStop = Math.min(coveredRegionStop,binTree.getStop());
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop); coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
}
GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus); GATKBAMFileSpan fileSpan = generateFileSpan(reader,index,binTree,currentLocus);
nextFilePointer.addFileSpans(reader,fileSpan); nextFilePointer.addFileSpans(reader,fileSpan);
} }
}
// Early exit if no bins were found. // Early exit if no bins were found.
if(coveredRegion == null) { if(coveredRegion == null) {
@ -195,6 +196,10 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
* @return File span mapping to given region. * @return File span mapping to given region.
*/ */
private GATKBAMFileSpan generateFileSpan(final SAMReaderID reader, final GATKBAMIndex index, final BinTree binTree, final GenomeLoc initialRegion) { private GATKBAMFileSpan generateFileSpan(final SAMReaderID reader, final GATKBAMIndex index, final BinTree binTree, final GenomeLoc initialRegion) {
// Empty bin trees mean empty file spans.
if(binTree == null)
return new GATKBAMFileSpan();
List<GATKChunk> chunks = new ArrayList<GATKChunk>(binTree.size()); List<GATKChunk> chunks = new ArrayList<GATKChunk>(binTree.size());
for(GATKBin bin: binTree.getBins()) { for(GATKBin bin: binTree.getBins()) {
if(bin == null) if(bin == null)