Reorganized NanoScheduler so that main thread does the reduces
-- Enables us to run -nt 2 -nct 2 and get meaningful output -- Uses a sleep / poll mechanism. Not ideal -- will look into wait / notify instead.
This commit is contained in:
parent
747694f7c2
commit
7425ab9637
|
|
@ -48,7 +48,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final int bufferSize;
|
final int bufferSize;
|
||||||
final int nThreads;
|
final int nThreads;
|
||||||
final ExecutorService inputExecutor;
|
final ExecutorService inputExecutor;
|
||||||
final ExecutorService masterExecutor;
|
final ExecutorService errorWatchingExecutor;
|
||||||
final ExecutorService mapExecutor;
|
final ExecutorService mapExecutor;
|
||||||
final Semaphore runningMapJobSlots;
|
final Semaphore runningMapJobSlots;
|
||||||
final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker();
|
final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker();
|
||||||
|
|
@ -85,14 +85,14 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
this.nThreads = nThreads;
|
this.nThreads = nThreads;
|
||||||
|
|
||||||
if ( nThreads == 1 ) {
|
if ( nThreads == 1 ) {
|
||||||
this.mapExecutor = this.inputExecutor = this.masterExecutor = null;
|
this.mapExecutor = this.inputExecutor = this.errorWatchingExecutor = null;
|
||||||
runningMapJobSlots = null;
|
runningMapJobSlots = null;
|
||||||
} else {
|
} else {
|
||||||
this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d"));
|
this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d"));
|
||||||
runningMapJobSlots = new Semaphore(this.bufferSize);
|
runningMapJobSlots = new Semaphore(this.bufferSize);
|
||||||
|
|
||||||
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
||||||
this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
this.errorWatchingExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// start timing the time spent outside of the nanoScheduler
|
// start timing the time spent outside of the nanoScheduler
|
||||||
|
|
@ -131,7 +131,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
if ( nThreads > 1 ) {
|
if ( nThreads > 1 ) {
|
||||||
shutdownExecutor("inputExecutor", inputExecutor);
|
shutdownExecutor("inputExecutor", inputExecutor);
|
||||||
shutdownExecutor("mapExecutor", mapExecutor);
|
shutdownExecutor("mapExecutor", mapExecutor);
|
||||||
shutdownExecutor("masterExecutor", masterExecutor);
|
shutdownExecutor("errorWatchingExecutor", errorWatchingExecutor);
|
||||||
}
|
}
|
||||||
|
|
||||||
shutdown = true;
|
shutdown = true;
|
||||||
|
|
@ -313,66 +313,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final NSReduceFunction<MapType, ReduceType> reduce) {
|
final NSReduceFunction<MapType, ReduceType> reduce) {
|
||||||
debugPrint("Executing nanoScheduler");
|
debugPrint("Executing nanoScheduler");
|
||||||
|
|
||||||
// start up the master job
|
final ErrorWatcherThread errorWatcher = new ErrorWatcherThread();
|
||||||
final MasterJob masterJob = new MasterJob(inputReader, map, initialValue, reduce);
|
errorWatchingExecutor.submit(errorWatcher);
|
||||||
final Future<ReduceType> reduceResult = masterExecutor.submit(masterJob);
|
|
||||||
|
|
||||||
while ( true ) {
|
|
||||||
// check that no errors occurred while we were waiting
|
|
||||||
handleErrors();
|
|
||||||
|
|
||||||
try {
|
|
||||||
final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
|
|
||||||
|
|
||||||
// in case an error occurred in the reduce
|
|
||||||
handleErrors();
|
|
||||||
|
|
||||||
// return our final reduce result
|
|
||||||
return result;
|
|
||||||
} catch (final TimeoutException ex ) {
|
|
||||||
// a normal case -- we just aren't done
|
|
||||||
} catch (final InterruptedException ex) {
|
|
||||||
errorTracker.notifyOfError(ex);
|
|
||||||
// will handle error in the next round of the for loop
|
|
||||||
} catch (final ExecutionException ex) {
|
|
||||||
errorTracker.notifyOfError(ex);
|
|
||||||
// will handle error in the next round of the for loop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void handleErrors() {
|
|
||||||
if ( errorTracker.hasAnErrorOccurred() ) {
|
|
||||||
masterExecutor.shutdownNow();
|
|
||||||
mapExecutor.shutdownNow();
|
|
||||||
inputExecutor.shutdownNow();
|
|
||||||
errorTracker.throwErrorIfPending();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* MasterJob has the task to enqueue Map jobs and wait for the final reduce
|
|
||||||
*
|
|
||||||
* It must be run in a separate thread in order to properly handle errors that may occur
|
|
||||||
* in the input, map, or reduce jobs without deadlocking.
|
|
||||||
*
|
|
||||||
* The result of this callable is the final reduce value for the input / map / reduce jobs
|
|
||||||
*/
|
|
||||||
private class MasterJob implements Callable<ReduceType> {
|
|
||||||
final Iterator<InputType> inputReader;
|
|
||||||
final NSMapFunction<InputType, MapType> map;
|
|
||||||
final ReduceType initialValue;
|
|
||||||
final NSReduceFunction<MapType, ReduceType> reduce;
|
|
||||||
|
|
||||||
private MasterJob(Iterator<InputType> inputReader, NSMapFunction<InputType, MapType> map, ReduceType initialValue, NSReduceFunction<MapType, ReduceType> reduce) {
|
|
||||||
this.inputReader = inputReader;
|
|
||||||
this.map = map;
|
|
||||||
this.initialValue = initialValue;
|
|
||||||
this.reduce = reduce;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ReduceType call() {
|
|
||||||
// a blocking queue that limits the number of input datum to the requested buffer size
|
// a blocking queue that limits the number of input datum to the requested buffer size
|
||||||
// note we need +1 because we continue to enqueue the lastObject
|
// note we need +1 because we continue to enqueue the lastObject
|
||||||
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
|
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
|
||||||
|
|
@ -407,10 +350,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
reducer.setTotalJobCount(nSubmittedJobs);
|
reducer.setTotalJobCount(nSubmittedJobs);
|
||||||
|
|
||||||
// wait for all of the input and map threads to finish
|
// wait for all of the input and map threads to finish
|
||||||
return waitForCompletion(inputProducer, reducer);
|
return waitForCompletion(inputProducer, reducer, errorWatcher);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
errorTracker.notifyOfError(ex);
|
// occurs in general because the error watching thread shut us down
|
||||||
return initialValue;
|
throw errorTracker.notifyOfError(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -418,7 +361,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
* Wait until the input thread and all map threads have completed running, and return the final reduce result
|
* Wait until the input thread and all map threads have completed running, and return the final reduce result
|
||||||
*/
|
*/
|
||||||
private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer,
|
private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer,
|
||||||
final Reducer<MapType, ReduceType> reducer) throws InterruptedException {
|
final Reducer<MapType, ReduceType> reducer,
|
||||||
|
final ErrorWatcherThread errorWatcher) throws InterruptedException {
|
||||||
// wait until we have a final reduce result
|
// wait until we have a final reduce result
|
||||||
// logger.warn("waiting for final reduce");
|
// logger.warn("waiting for final reduce");
|
||||||
final ReduceType finalSum = reducer.waitForFinalReduce();
|
final ReduceType finalSum = reducer.waitForFinalReduce();
|
||||||
|
|
@ -432,6 +376,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
runningMapJobSlots.acquire(bufferSize);
|
runningMapJobSlots.acquire(bufferSize);
|
||||||
runningMapJobSlots.release(bufferSize);
|
runningMapJobSlots.release(bufferSize);
|
||||||
|
|
||||||
|
// We are done with everything so shutdown the errorWatcher thread
|
||||||
|
errorWatcher.shutdown();
|
||||||
|
|
||||||
// everything is finally shutdown, return the final reduce value
|
// everything is finally shutdown, return the final reduce value
|
||||||
return finalSum;
|
return finalSum;
|
||||||
}
|
}
|
||||||
|
|
@ -453,8 +400,67 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final int nReadItems = inputProducer.getNumInputValues();
|
final int nReadItems = inputProducer.getNumInputValues();
|
||||||
return nReadItems == -1 || nJobsSubmitted < nReadItems;
|
return nReadItems == -1 || nJobsSubmitted < nReadItems;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A thread that periodically wakes up and checks to see if an error has occurred, and if
|
||||||
|
* so shuts down the NanoScheduler (via shutdownNow()), sending an InterruptedException to
|
||||||
|
* the main thread, which throws the error in the errorTracker.
|
||||||
|
*
|
||||||
|
* The main thread should call shutdown() when its ready to return itself, which will cause
|
||||||
|
* the run() method of this thread to abort in the next iteration. Uses a local latch to
|
||||||
|
* cause the thread calling shutdown to block until the run() method exits.
|
||||||
|
*/
|
||||||
|
private class ErrorWatcherThread implements Runnable {
|
||||||
|
boolean done = false;
|
||||||
|
final CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
|
||||||
|
private boolean isDone() {
|
||||||
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown this ErrorWatcher, blocking until the run() method of this thread exits
|
||||||
|
*
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void shutdown() throws InterruptedException {
|
||||||
|
this.done = true;
|
||||||
|
latch.await();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while ( ! isDone() ) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(100);
|
||||||
|
handleErrorsIfOneOccurred();
|
||||||
|
} catch (final InterruptedException ex) {
|
||||||
|
break; // just exit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// free the latch so the shutdown thread starts up
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If an error has occurred in the tracker, shut down the executors and
|
||||||
|
* throw the occur, otherwise do nothing.
|
||||||
|
*/
|
||||||
|
private void handleErrorsIfOneOccurred() {
|
||||||
|
if ( errorTracker.hasAnErrorOccurred() ) {
|
||||||
|
mapExecutor.shutdownNow();
|
||||||
|
inputExecutor.shutdownNow();
|
||||||
|
errorWatchingExecutor.shutdownNow();
|
||||||
|
errorTracker.throwErrorIfPending();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes a single map job, reading the next element from the input inputQueue
|
||||||
|
* and after mapping runs reduce on as many elements as possible
|
||||||
|
*/
|
||||||
private class MapReduceJob implements Runnable {
|
private class MapReduceJob implements Runnable {
|
||||||
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue;
|
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue;
|
||||||
final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue;
|
final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue;
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class NanoSchedulerUnitTest extends BaseTest {
|
public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
private final static boolean debug = false;
|
private final static boolean debug = false;
|
||||||
public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000;
|
public static final int NANO_SCHEDULE_MAX_RUNTIME = 10000;
|
||||||
|
|
||||||
private static class Map2x implements NSMapFunction<Integer, Integer> {
|
private static class Map2x implements NSMapFunction<Integer, Integer> {
|
||||||
@Override public Integer apply(Integer input) { return input * 2; }
|
@Override public Integer apply(Integer input) { return input * 2; }
|
||||||
|
|
@ -228,12 +228,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
|
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000)
|
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000, invocationCount = 50)
|
||||||
public void testInputErrorIsThrown_NPE() throws InterruptedException {
|
public void testInputErrorIsThrown_NPE() throws InterruptedException {
|
||||||
executeTestErrorThrowingInput(new NullPointerException());
|
executeTestErrorThrowingInput(new NullPointerException());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000)
|
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000, invocationCount = 50)
|
||||||
public void testInputErrorIsThrown_RSE() throws InterruptedException {
|
public void testInputErrorIsThrown_RSE() throws InterruptedException {
|
||||||
executeTestErrorThrowingInput(new ReviewedStingException("test"));
|
executeTestErrorThrowingInput(new ReviewedStingException("test"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue