NanoScheduler optimization for TraverseReadsNano
-- Pre-read MapData into a list, which is actually faster than dealing with future lock contention issues with lots of map threads -- Increase the ReadShard default size to 100K reads by default
This commit is contained in:
parent
f849910c4e
commit
b92f563d06
|
|
@ -253,9 +253,10 @@ public class SAMDataSource {
|
|||
if(readBufferSize != null)
|
||||
ReadShard.setReadBufferSize(readBufferSize); // TODO: use of non-final static variable here is just awful, especially for parallel tests
|
||||
else {
|
||||
// Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
|
||||
// will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
|
||||
ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
|
||||
// Choose a sensible default for the read buffer size.
|
||||
// Previously we we're picked 100000 reads per BAM per shard with a max cap of 250K reads in memory at once.
|
||||
// Now we are simply setting it to 100K reads
|
||||
ReadShard.setReadBufferSize(100000);
|
||||
}
|
||||
|
||||
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* A nano-scheduling version of TraverseReads.
|
||||
|
|
@ -53,6 +54,7 @@ import java.util.Iterator;
|
|||
*/
|
||||
public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private final static boolean PRE_READ_ALL_MAP_DATA = true;
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
|
||||
private static final boolean DEBUG = false;
|
||||
final NanoScheduler<MapData, MapResult, T> nanoScheduler;
|
||||
|
|
@ -111,7 +113,19 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
* should execute
|
||||
*/
|
||||
private Iterator<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
|
||||
return new Iterator<MapData>() {
|
||||
final Iterator<MapData> it = makeDataIterator(dataProvider);
|
||||
if ( PRE_READ_ALL_MAP_DATA ) {
|
||||
final LinkedList<MapData> l = new LinkedList<MapData>();
|
||||
while ( it.hasNext() ) l.add(it.next());
|
||||
return l.iterator();
|
||||
} else {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Iterator<MapData> makeDataIterator(final ReadShardDataProvider dataProvider) {
|
||||
return new Iterator<MapData> () {
|
||||
final ReadView reads = new ReadView(dataProvider);
|
||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
|
|
|||
Loading…
Reference in New Issue