Oops! Introduced a performance bug in read interval sharding, when the new sharding system is available. Track more state to avoid this problem in the future.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2987 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-03-11 23:19:42 +00:00
parent 740238b4fa
commit 5e8654fcdc
2 changed files with 24 additions and 7 deletions

View File

@ -35,7 +35,17 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
private final List<FilePointer> filePointers = new ArrayList<FilePointer>(); private final List<FilePointer> filePointers = new ArrayList<FilePointer>();
/** /**
* Position of the last shard in the file. * Iterator over the list of file pointers.
*/
private final Iterator<FilePointer> filePointerIterator;
/**
* The file pointer currently being processed.
*/
private FilePointer currentFilePointer;
/**
* Ending position of the last shard in the file.
*/ */
private Map<SAMReaderID,Chunk> position; private Map<SAMReaderID,Chunk> position;
@ -51,6 +61,11 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
this.position = this.dataSource.getCurrentPosition(); this.position = this.dataSource.getCurrentPosition();
if(locations != null) if(locations != null)
filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList(),this.dataSource.getNumIndexLevels()-1)); filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList(),this.dataSource.getNumIndexLevels()-1));
filePointerIterator = filePointers.iterator();
if(filePointerIterator.hasNext())
currentFilePointer = filePointerIterator.next();
advance(); advance();
} }
@ -81,10 +96,10 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
SamRecordFilter filter = null; SamRecordFilter filter = null;
if(!filePointers.isEmpty()) { if(!filePointers.isEmpty()) {
for(FilePointer filePointer: filePointers) { Map<SAMReaderID,List<Chunk>> selectedReaders = new HashMap<SAMReaderID,List<Chunk>>();
shardPosition = dataSource.getFilePointersBounding(filePointer.bin); while(selectedReaders.size() == 0 && currentFilePointer != null) {
shardPosition = dataSource.getFilePointersBounding(currentFilePointer.bin);
Map<SAMReaderID,List<Chunk>> selectedReaders = new HashMap<SAMReaderID,List<Chunk>>();
for(SAMReaderID id: shardPosition.keySet()) { for(SAMReaderID id: shardPosition.keySet()) {
List<Chunk> chunks = shardPosition.get(id); List<Chunk> chunks = shardPosition.get(id);
List<Chunk> selectedChunks = new ArrayList<Chunk>(); List<Chunk> selectedChunks = new ArrayList<Chunk>();
@ -98,17 +113,19 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
if(selectedChunks.size() > 0) if(selectedChunks.size() > 0)
selectedReaders.put(id,selectedChunks); selectedReaders.put(id,selectedChunks);
} }
if(selectedReaders.size() > 0) {
filter = new ReadOverlapFilter(filePointer.locations);
if(selectedReaders.size() > 0) {
BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),selectedReaders,filter,Shard.ShardType.READ); BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),selectedReaders,filter,Shard.ShardType.READ);
dataSource.fillShard(shard); dataSource.fillShard(shard);
if(!shard.isBufferEmpty()) { if(!shard.isBufferEmpty()) {
filter = new ReadOverlapFilter(currentFilePointer.locations);
nextShard = shard; nextShard = shard;
break; break;
} }
} }
currentFilePointer = filePointerIterator.hasNext() ? filePointerIterator.next() : null;
} }
} }
else { else {

View File

@ -11,7 +11,7 @@ import java.util.Iterator;
*/ */
public class ReadDelimitedReadShardStrategy extends ReadShardStrategy { public class ReadDelimitedReadShardStrategy extends ReadShardStrategy {
// our read bucket size, default // our read bucket size, default
protected long readCount = 100000L; protected long readCount = 1000L;
// our hasnext flag // our hasnext flag
boolean hasNext = true; boolean hasNext = true;