Initial NanoScheduler with input producer thread
This commit is contained in:
parent
03dd470ec1
commit
a997c99806
|
|
@ -5,6 +5,7 @@ import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
@ -74,7 +75,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
|
|
||||||
this.bufferSize = bufferSize;
|
this.bufferSize = bufferSize;
|
||||||
this.nThreads = nThreads;
|
this.nThreads = nThreads;
|
||||||
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
|
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads-1);
|
||||||
|
|
||||||
// start timing the time spent outside of the nanoScheduler
|
// start timing the time spent outside of the nanoScheduler
|
||||||
outsideSchedulerTimer.start();
|
outsideSchedulerTimer.start();
|
||||||
|
|
@ -232,12 +233,19 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
|
final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
|
||||||
debugPrint("Executing nanoScheduler");
|
debugPrint("Executing nanoScheduler");
|
||||||
ReduceType sum = initialValue;
|
ReduceType sum = initialValue;
|
||||||
while ( inputReader.hasNext() ) {
|
boolean done = false;
|
||||||
try {
|
|
||||||
// read in our input values
|
|
||||||
final List<InputType> inputs = readInputs(inputReader);
|
|
||||||
debugPrint("Enqueuing " + inputs.size() + " elements to map");
|
|
||||||
|
|
||||||
|
final BlockingQueue<InputDatum> inputQueue = new LinkedBlockingDeque<InputDatum>(bufferSize);
|
||||||
|
final InputProducer inputProducer = new InputProducer(inputReader, inputQueue);
|
||||||
|
inputProducer.start();
|
||||||
|
|
||||||
|
while ( ! done ) {
|
||||||
|
try {
|
||||||
|
final Pair<List<InputType>, Boolean> readResults = readInputs(inputQueue);
|
||||||
|
final List<InputType> inputs = readResults.getFirst();
|
||||||
|
done = readResults.getSecond();
|
||||||
|
|
||||||
|
if ( ! inputs.isEmpty() ) {
|
||||||
// send jobs for map
|
// send jobs for map
|
||||||
final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
|
final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
|
||||||
|
|
||||||
|
|
@ -246,6 +254,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
debugPrint(" Done with cycle of map/reduce");
|
debugPrint(" Done with cycle of map/reduce");
|
||||||
|
|
||||||
if ( progressFunction != null ) progressFunction.progress(inputs.get(inputs.size()-1));
|
if ( progressFunction != null ) progressFunction.progress(inputs.get(inputs.size()-1));
|
||||||
|
} else {
|
||||||
|
// we must be done
|
||||||
|
if ( ! done ) throw new IllegalStateException("Inputs empty but not done");
|
||||||
|
}
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
throw new ReviewedStingException("got execution exception", ex);
|
throw new ReviewedStingException("got execution exception", ex);
|
||||||
} catch (ExecutionException ex) {
|
} catch (ExecutionException ex) {
|
||||||
|
|
@ -267,9 +279,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
for ( final Future<MapType> future : mapQueue ) {
|
for ( final Future<MapType> future : mapQueue ) {
|
||||||
final MapType value = future.get(); // block until we get the values for this task
|
final MapType value = future.get(); // block until we get the values for this task
|
||||||
|
|
||||||
if ( TIME_CALLS) reduceTimer.restart();
|
if ( TIME_CALLS ) reduceTimer.restart();
|
||||||
sum = reduce.apply(value, sum);
|
sum = reduce.apply(value, sum);
|
||||||
if ( TIME_CALLS) reduceTimer.stop();
|
if ( TIME_CALLS ) reduceTimer.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
|
|
@ -280,21 +292,68 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
*
|
*
|
||||||
* @return a queue of input read in, containing one or more values of InputType read in
|
* @return a queue of input read in, containing one or more values of InputType read in
|
||||||
*/
|
*/
|
||||||
@Requires("inputReader.hasNext()")
|
@Requires("inputReader != null")
|
||||||
@Ensures("!result.isEmpty()")
|
@Ensures("result != null")
|
||||||
private List<InputType> readInputs(final Iterator<InputType> inputReader) {
|
private Pair<List<InputType>, Boolean> readInputs(final BlockingQueue<InputDatum> inputReader) throws InterruptedException {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
final List<InputType> inputs = new LinkedList<InputType>();
|
final List<InputType> inputs = new LinkedList<InputType>();
|
||||||
|
boolean done = false;
|
||||||
|
|
||||||
if ( TIME_CALLS) inputTimer.restart();
|
while ( ! done && n < getBufferSize() ) {
|
||||||
while ( inputReader.hasNext() && n < getBufferSize() ) {
|
final InputDatum input = inputReader.take();
|
||||||
final InputType input = inputReader.next();
|
done = input.isLast();
|
||||||
inputs.add(input);
|
if ( ! done ) {
|
||||||
|
inputs.add(input.datum);
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
if ( TIME_CALLS) inputTimer.stop();
|
}
|
||||||
|
|
||||||
return inputs;
|
return new Pair<List<InputType>, Boolean>(inputs, done);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class InputProducer extends Thread {
|
||||||
|
final Iterator<InputType> inputReader;
|
||||||
|
final BlockingQueue<InputDatum> outputQueue;
|
||||||
|
|
||||||
|
public InputProducer(final Iterator<InputType> inputReader, final BlockingQueue<InputDatum> outputQueue) {
|
||||||
|
this.inputReader = inputReader;
|
||||||
|
this.outputQueue = outputQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
while ( inputReader.hasNext() ) {
|
||||||
|
if ( TIME_CALLS ) inputTimer.restart();
|
||||||
|
final InputType input = inputReader.next();
|
||||||
|
if ( TIME_CALLS ) inputTimer.stop();
|
||||||
|
outputQueue.put(new InputDatum(input));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the EOF object so we know we are done
|
||||||
|
outputQueue.put(new InputDatum());
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
throw new ReviewedStingException("got execution exception", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class InputDatum {
|
||||||
|
final boolean isLast;
|
||||||
|
final InputType datum;
|
||||||
|
|
||||||
|
private InputDatum(final InputType datum) {
|
||||||
|
isLast = false;
|
||||||
|
this.datum = datum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private InputDatum() {
|
||||||
|
isLast = true;
|
||||||
|
this.datum = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLast() {
|
||||||
|
return isLast;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"map != null", "! inputs.isEmpty()"})
|
@Requires({"map != null", "! inputs.isEmpty()"})
|
||||||
|
|
@ -326,10 +385,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public MapType call() throws Exception {
|
@Override public MapType call() throws Exception {
|
||||||
if ( TIME_CALLS) mapTimer.restart();
|
if ( TIME_CALLS ) mapTimer.restart();
|
||||||
if ( debug ) debugPrint("\t\tmap " + input);
|
if ( debug ) debugPrint("\t\tmap " + input);
|
||||||
final MapType result = map.apply(input);
|
final MapType result = map.apply(input);
|
||||||
if ( TIME_CALLS) mapTimer.stop();
|
if ( TIME_CALLS ) mapTimer.stop();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
Assert.assertNotNull(sum);
|
Assert.assertNotNull(sum);
|
||||||
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
|
Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
|
||||||
|
|
||||||
Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected");
|
Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected. Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
|
||||||
nanoScheduler.shutdown();
|
nanoScheduler.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -168,6 +168,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
|
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
|
||||||
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
|
final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
|
||||||
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
|
new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
|
||||||
|
nanoScheduler.setDebug(true);
|
||||||
|
|
||||||
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
|
final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
|
||||||
System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);
|
System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue