Reorganized NanoScheduler so that main thread does the reduces

-- Enables us to run -nt 2 -nct 2 and get meaningful output
-- Uses a sleep / poll mechanism.  Not ideal -- will look into wait / notify instead.
This commit is contained in:
Mark DePristo 2012-09-20 17:07:49 -04:00
parent 747694f7c2
commit 7425ab9637
2 changed files with 138 additions and 132 deletions

View File

@ -48,7 +48,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
final int bufferSize; final int bufferSize;
final int nThreads; final int nThreads;
final ExecutorService inputExecutor; final ExecutorService inputExecutor;
final ExecutorService masterExecutor; final ExecutorService errorWatchingExecutor;
final ExecutorService mapExecutor; final ExecutorService mapExecutor;
final Semaphore runningMapJobSlots; final Semaphore runningMapJobSlots;
final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker(); final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker();
@ -85,14 +85,14 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
this.nThreads = nThreads; this.nThreads = nThreads;
if ( nThreads == 1 ) { if ( nThreads == 1 ) {
this.mapExecutor = this.inputExecutor = this.masterExecutor = null; this.mapExecutor = this.inputExecutor = this.errorWatchingExecutor = null;
runningMapJobSlots = null; runningMapJobSlots = null;
} else { } else {
this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d")); this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d"));
runningMapJobSlots = new Semaphore(this.bufferSize); runningMapJobSlots = new Semaphore(this.bufferSize);
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d")); this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d")); this.errorWatchingExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
} }
// start timing the time spent outside of the nanoScheduler // start timing the time spent outside of the nanoScheduler
@ -131,7 +131,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
if ( nThreads > 1 ) { if ( nThreads > 1 ) {
shutdownExecutor("inputExecutor", inputExecutor); shutdownExecutor("inputExecutor", inputExecutor);
shutdownExecutor("mapExecutor", mapExecutor); shutdownExecutor("mapExecutor", mapExecutor);
shutdownExecutor("masterExecutor", masterExecutor); shutdownExecutor("errorWatchingExecutor", errorWatchingExecutor);
} }
shutdown = true; shutdown = true;
@ -313,66 +313,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
final NSReduceFunction<MapType, ReduceType> reduce) { final NSReduceFunction<MapType, ReduceType> reduce) {
debugPrint("Executing nanoScheduler"); debugPrint("Executing nanoScheduler");
// start up the master job final ErrorWatcherThread errorWatcher = new ErrorWatcherThread();
final MasterJob masterJob = new MasterJob(inputReader, map, initialValue, reduce); errorWatchingExecutor.submit(errorWatcher);
final Future<ReduceType> reduceResult = masterExecutor.submit(masterJob);
while ( true ) {
// check that no errors occurred while we were waiting
handleErrors();
try {
final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
// in case an error occurred in the reduce
handleErrors();
// return our final reduce result
return result;
} catch (final TimeoutException ex ) {
// a normal case -- we just aren't done
} catch (final InterruptedException ex) {
errorTracker.notifyOfError(ex);
// will handle error in the next round of the for loop
} catch (final ExecutionException ex) {
errorTracker.notifyOfError(ex);
// will handle error in the next round of the for loop
}
}
}
private void handleErrors() {
if ( errorTracker.hasAnErrorOccurred() ) {
masterExecutor.shutdownNow();
mapExecutor.shutdownNow();
inputExecutor.shutdownNow();
errorTracker.throwErrorIfPending();
}
}
/**
* MasterJob has the task to enqueue Map jobs and wait for the final reduce
*
* It must be run in a separate thread in order to properly handle errors that may occur
* in the input, map, or reduce jobs without deadlocking.
*
* The result of this callable is the final reduce value for the input / map / reduce jobs
*/
private class MasterJob implements Callable<ReduceType> {
final Iterator<InputType> inputReader;
final NSMapFunction<InputType, MapType> map;
final ReduceType initialValue;
final NSReduceFunction<MapType, ReduceType> reduce;
private MasterJob(Iterator<InputType> inputReader, NSMapFunction<InputType, MapType> map, ReduceType initialValue, NSReduceFunction<MapType, ReduceType> reduce) {
this.inputReader = inputReader;
this.map = map;
this.initialValue = initialValue;
this.reduce = reduce;
}
@Override
public ReduceType call() {
// a blocking queue that limits the number of input datum to the requested buffer size // a blocking queue that limits the number of input datum to the requested buffer size
// note we need +1 because we continue to enqueue the lastObject // note we need +1 because we continue to enqueue the lastObject
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
@ -407,10 +350,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
reducer.setTotalJobCount(nSubmittedJobs); reducer.setTotalJobCount(nSubmittedJobs);
// wait for all of the input and map threads to finish // wait for all of the input and map threads to finish
return waitForCompletion(inputProducer, reducer); return waitForCompletion(inputProducer, reducer, errorWatcher);
} catch (Exception ex) { } catch (Exception ex) {
errorTracker.notifyOfError(ex); // occurs in general because the error watching thread shut us down
return initialValue; throw errorTracker.notifyOfError(ex);
} }
} }
@ -418,7 +361,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
* Wait until the input thread and all map threads have completed running, and return the final reduce result * Wait until the input thread and all map threads have completed running, and return the final reduce result
*/ */
private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer, private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer,
final Reducer<MapType, ReduceType> reducer) throws InterruptedException { final Reducer<MapType, ReduceType> reducer,
final ErrorWatcherThread errorWatcher) throws InterruptedException {
// wait until we have a final reduce result // wait until we have a final reduce result
// logger.warn("waiting for final reduce"); // logger.warn("waiting for final reduce");
final ReduceType finalSum = reducer.waitForFinalReduce(); final ReduceType finalSum = reducer.waitForFinalReduce();
@ -432,6 +376,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
runningMapJobSlots.acquire(bufferSize); runningMapJobSlots.acquire(bufferSize);
runningMapJobSlots.release(bufferSize); runningMapJobSlots.release(bufferSize);
// We are done with everything so shutdown the errorWatcher thread
errorWatcher.shutdown();
// everything is finally shutdown, return the final reduce value // everything is finally shutdown, return the final reduce value
return finalSum; return finalSum;
} }
@ -453,8 +400,67 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
final int nReadItems = inputProducer.getNumInputValues(); final int nReadItems = inputProducer.getNumInputValues();
return nReadItems == -1 || nJobsSubmitted < nReadItems; return nReadItems == -1 || nJobsSubmitted < nReadItems;
} }
/**
* A thread that periodically wakes up and checks to see if an error has occurred, and if
* so shuts down the NanoScheduler (via shutdownNow()), sending an InterruptedException to
* the main thread, which throws the error in the errorTracker.
*
* The main thread should call shutdown() when its ready to return itself, which will cause
* the run() method of this thread to abort in the next iteration. Uses a local latch to
* cause the thread calling shutdown to block until the run() method exits.
*/
private class ErrorWatcherThread implements Runnable {
boolean done = false;
final CountDownLatch latch = new CountDownLatch(1);
private boolean isDone() {
return done;
} }
/**
* Shutdown this ErrorWatcher, blocking until the run() method of this thread exits
*
* @throws InterruptedException
*/
public void shutdown() throws InterruptedException {
this.done = true;
latch.await();
}
@Override
public void run() {
while ( ! isDone() ) {
try {
Thread.sleep(100);
handleErrorsIfOneOccurred();
} catch (final InterruptedException ex) {
break; // just exit
}
}
// free the latch so the shutdown thread starts up
latch.countDown();
}
}
/**
* If an error has occurred in the tracker, shut down the executors and
* throw the occur, otherwise do nothing.
*/
private void handleErrorsIfOneOccurred() {
if ( errorTracker.hasAnErrorOccurred() ) {
mapExecutor.shutdownNow();
inputExecutor.shutdownNow();
errorWatchingExecutor.shutdownNow();
errorTracker.throwErrorIfPending();
}
}
/**
* Executes a single map job, reading the next element from the input inputQueue
* and after mapping runs reduce on as many elements as possible
*/
private class MapReduceJob implements Runnable { private class MapReduceJob implements Runnable {
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue; final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue;
final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue; final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue;

View File

@ -24,7 +24,7 @@ import java.util.List;
*/ */
public class NanoSchedulerUnitTest extends BaseTest { public class NanoSchedulerUnitTest extends BaseTest {
private final static boolean debug = false; private final static boolean debug = false;
public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000; public static final int NANO_SCHEDULE_MAX_RUNTIME = 10000;
private static class Map2x implements NSMapFunction<Integer, Integer> { private static class Map2x implements NSMapFunction<Integer, Integer> {
@Override public Integer apply(Integer input) { return input * 2; } @Override public Integer apply(Integer input) { return input * 2; }
@ -228,12 +228,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce()); nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
} }
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000) @Test(expectedExceptions = NullPointerException.class, timeOut = 10000, invocationCount = 50)
public void testInputErrorIsThrown_NPE() throws InterruptedException { public void testInputErrorIsThrown_NPE() throws InterruptedException {
executeTestErrorThrowingInput(new NullPointerException()); executeTestErrorThrowingInput(new NullPointerException());
} }
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000) @Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000, invocationCount = 50)
public void testInputErrorIsThrown_RSE() throws InterruptedException { public void testInputErrorIsThrown_RSE() throws InterruptedException {
executeTestErrorThrowingInput(new ReviewedStingException("test")); executeTestErrorThrowingInput(new ReviewedStingException("test"));
} }