Optimizations for parallel read walkers

-- TraversalReadsNano only creates the NanoScheduler once, and shuts it down onTraversalDone
-- Nicer debugging output in NanoScheduler
-- ReadShard has a getBufferSize() method now
This commit is contained in:
Mark DePristo 2012-08-25 17:21:02 -04:00
parent 5066b14335
commit fde9824765
3 changed files with 22 additions and 8 deletions

View File

@ -58,6 +58,15 @@ public class ReadShard extends Shard {
MAX_READS = bufferSize;
}
/**
* What read buffer size are we using?
*
* @return
*/
public static int getReadBufferSize() {
return MAX_READS;
}
/**
* Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations.

View File

@ -51,12 +51,12 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
/** our log, which we want to capture anything from this class */
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
private static final boolean DEBUG = false;
final int bufferSize = ReadShard.MAX_READS;
final int mapGroupSize = bufferSize / 10 + 1;
final int nThreads;
final NanoScheduler<SAMRecord, M, T> nanoScheduler;
public TraverseReadsNano(int nThreads) {
this.nThreads = nThreads;
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
final int mapGroupSize = bufferSize / 10 + 1;
nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
}
@Override
@ -87,18 +87,23 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
final ReadReferenceView reference = new NotImplementedReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
final NanoScheduler<SAMRecord, M, T> nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
nanoScheduler.setDebug(DEBUG);
final TraverseReadsMap myMap = new TraverseReadsMap(reads, reference, rodView, walker);
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
T result = nanoScheduler.execute(reads.iterator().iterator(), myMap, sum, myReduce);
nanoScheduler.shutdown();
// TODO -- how do we print progress?
//printProgress(dataProvider.getShard(), ???);
return result;
}
@Override
public void printOnTraversalDone() {
nanoScheduler.shutdown();
super.printOnTraversalDone(); //To change body of overridden methods use File | Settings | File Templates.
}
private static class NotImplementedReadReferenceView extends ReadReferenceView {
private NotImplementedReadReferenceView(ShardDataProvider provider) {
super(provider);

View File

@ -136,7 +136,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
private void debugPrint(final String format, Object ... args) {
if ( isDebug() )
logger.info(String.format(format, args));
logger.info("Thread " + Thread.currentThread().getId() + ":" + String.format(format, args));
}
@ -205,7 +205,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
final MapFunction<InputType, MapType> map,
final ReduceType initialValue,
final ReduceFunction<MapType, ReduceType> reduce) {
debugPrint("Executing nanoScheduler with initial reduce value " + initialValue);
debugPrint("Executing nanoScheduler");
ReduceType sum = initialValue;
while ( inputReader.hasNext() ) {
try {