Revert "Reorganized NanoScheduler so that main thread does the reduces"
Doesn't actually fix the problem, and adds an unnecessary delay in closing down NanoScheduler, so reverting. This reverts commit 66b820bf94ae755a8a0c71ea16f4cae56fd3e852.
This commit is contained in:
parent
7425ab9637
commit
ba9e95a8fe
|
|
@ -48,7 +48,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final int bufferSize;
|
final int bufferSize;
|
||||||
final int nThreads;
|
final int nThreads;
|
||||||
final ExecutorService inputExecutor;
|
final ExecutorService inputExecutor;
|
||||||
final ExecutorService errorWatchingExecutor;
|
final ExecutorService masterExecutor;
|
||||||
final ExecutorService mapExecutor;
|
final ExecutorService mapExecutor;
|
||||||
final Semaphore runningMapJobSlots;
|
final Semaphore runningMapJobSlots;
|
||||||
final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker();
|
final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker();
|
||||||
|
|
@ -85,14 +85,14 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
this.nThreads = nThreads;
|
this.nThreads = nThreads;
|
||||||
|
|
||||||
if ( nThreads == 1 ) {
|
if ( nThreads == 1 ) {
|
||||||
this.mapExecutor = this.inputExecutor = this.errorWatchingExecutor = null;
|
this.mapExecutor = this.inputExecutor = this.masterExecutor = null;
|
||||||
runningMapJobSlots = null;
|
runningMapJobSlots = null;
|
||||||
} else {
|
} else {
|
||||||
this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d"));
|
this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d"));
|
||||||
runningMapJobSlots = new Semaphore(this.bufferSize);
|
runningMapJobSlots = new Semaphore(this.bufferSize);
|
||||||
|
|
||||||
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
||||||
this.errorWatchingExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// start timing the time spent outside of the nanoScheduler
|
// start timing the time spent outside of the nanoScheduler
|
||||||
|
|
@ -131,7 +131,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
if ( nThreads > 1 ) {
|
if ( nThreads > 1 ) {
|
||||||
shutdownExecutor("inputExecutor", inputExecutor);
|
shutdownExecutor("inputExecutor", inputExecutor);
|
||||||
shutdownExecutor("mapExecutor", mapExecutor);
|
shutdownExecutor("mapExecutor", mapExecutor);
|
||||||
shutdownExecutor("errorWatchingExecutor", errorWatchingExecutor);
|
shutdownExecutor("masterExecutor", masterExecutor);
|
||||||
}
|
}
|
||||||
|
|
||||||
shutdown = true;
|
shutdown = true;
|
||||||
|
|
@ -313,9 +313,66 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final NSReduceFunction<MapType, ReduceType> reduce) {
|
final NSReduceFunction<MapType, ReduceType> reduce) {
|
||||||
debugPrint("Executing nanoScheduler");
|
debugPrint("Executing nanoScheduler");
|
||||||
|
|
||||||
final ErrorWatcherThread errorWatcher = new ErrorWatcherThread();
|
// start up the master job
|
||||||
errorWatchingExecutor.submit(errorWatcher);
|
final MasterJob masterJob = new MasterJob(inputReader, map, initialValue, reduce);
|
||||||
|
final Future<ReduceType> reduceResult = masterExecutor.submit(masterJob);
|
||||||
|
|
||||||
|
while ( true ) {
|
||||||
|
// check that no errors occurred while we were waiting
|
||||||
|
handleErrors();
|
||||||
|
|
||||||
|
try {
|
||||||
|
final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
// in case an error occurred in the reduce
|
||||||
|
handleErrors();
|
||||||
|
|
||||||
|
// return our final reduce result
|
||||||
|
return result;
|
||||||
|
} catch (final TimeoutException ex ) {
|
||||||
|
// a normal case -- we just aren't done
|
||||||
|
} catch (final InterruptedException ex) {
|
||||||
|
errorTracker.notifyOfError(ex);
|
||||||
|
// will handle error in the next round of the for loop
|
||||||
|
} catch (final ExecutionException ex) {
|
||||||
|
errorTracker.notifyOfError(ex);
|
||||||
|
// will handle error in the next round of the for loop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleErrors() {
|
||||||
|
if ( errorTracker.hasAnErrorOccurred() ) {
|
||||||
|
masterExecutor.shutdownNow();
|
||||||
|
mapExecutor.shutdownNow();
|
||||||
|
inputExecutor.shutdownNow();
|
||||||
|
errorTracker.throwErrorIfPending();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MasterJob has the task to enqueue Map jobs and wait for the final reduce
|
||||||
|
*
|
||||||
|
* It must be run in a separate thread in order to properly handle errors that may occur
|
||||||
|
* in the input, map, or reduce jobs without deadlocking.
|
||||||
|
*
|
||||||
|
* The result of this callable is the final reduce value for the input / map / reduce jobs
|
||||||
|
*/
|
||||||
|
private class MasterJob implements Callable<ReduceType> {
|
||||||
|
final Iterator<InputType> inputReader;
|
||||||
|
final NSMapFunction<InputType, MapType> map;
|
||||||
|
final ReduceType initialValue;
|
||||||
|
final NSReduceFunction<MapType, ReduceType> reduce;
|
||||||
|
|
||||||
|
private MasterJob(Iterator<InputType> inputReader, NSMapFunction<InputType, MapType> map, ReduceType initialValue, NSReduceFunction<MapType, ReduceType> reduce) {
|
||||||
|
this.inputReader = inputReader;
|
||||||
|
this.map = map;
|
||||||
|
this.initialValue = initialValue;
|
||||||
|
this.reduce = reduce;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ReduceType call() {
|
||||||
// a blocking queue that limits the number of input datum to the requested buffer size
|
// a blocking queue that limits the number of input datum to the requested buffer size
|
||||||
// note we need +1 because we continue to enqueue the lastObject
|
// note we need +1 because we continue to enqueue the lastObject
|
||||||
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
|
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
|
||||||
|
|
@ -350,10 +407,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
reducer.setTotalJobCount(nSubmittedJobs);
|
reducer.setTotalJobCount(nSubmittedJobs);
|
||||||
|
|
||||||
// wait for all of the input and map threads to finish
|
// wait for all of the input and map threads to finish
|
||||||
return waitForCompletion(inputProducer, reducer, errorWatcher);
|
return waitForCompletion(inputProducer, reducer);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
// occurs in general because the error watching thread shut us down
|
errorTracker.notifyOfError(ex);
|
||||||
throw errorTracker.notifyOfError(ex);
|
return initialValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -361,8 +418,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
* Wait until the input thread and all map threads have completed running, and return the final reduce result
|
* Wait until the input thread and all map threads have completed running, and return the final reduce result
|
||||||
*/
|
*/
|
||||||
private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer,
|
private ReduceType waitForCompletion(final InputProducer<InputType> inputProducer,
|
||||||
final Reducer<MapType, ReduceType> reducer,
|
final Reducer<MapType, ReduceType> reducer) throws InterruptedException {
|
||||||
final ErrorWatcherThread errorWatcher) throws InterruptedException {
|
|
||||||
// wait until we have a final reduce result
|
// wait until we have a final reduce result
|
||||||
// logger.warn("waiting for final reduce");
|
// logger.warn("waiting for final reduce");
|
||||||
final ReduceType finalSum = reducer.waitForFinalReduce();
|
final ReduceType finalSum = reducer.waitForFinalReduce();
|
||||||
|
|
@ -376,9 +432,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
runningMapJobSlots.acquire(bufferSize);
|
runningMapJobSlots.acquire(bufferSize);
|
||||||
runningMapJobSlots.release(bufferSize);
|
runningMapJobSlots.release(bufferSize);
|
||||||
|
|
||||||
// We are done with everything so shutdown the errorWatcher thread
|
|
||||||
errorWatcher.shutdown();
|
|
||||||
|
|
||||||
// everything is finally shutdown, return the final reduce value
|
// everything is finally shutdown, return the final reduce value
|
||||||
return finalSum;
|
return finalSum;
|
||||||
}
|
}
|
||||||
|
|
@ -400,67 +453,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||||
final int nReadItems = inputProducer.getNumInputValues();
|
final int nReadItems = inputProducer.getNumInputValues();
|
||||||
return nReadItems == -1 || nJobsSubmitted < nReadItems;
|
return nReadItems == -1 || nJobsSubmitted < nReadItems;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A thread that periodically wakes up and checks to see if an error has occurred, and if
|
|
||||||
* so shuts down the NanoScheduler (via shutdownNow()), sending an InterruptedException to
|
|
||||||
* the main thread, which throws the error in the errorTracker.
|
|
||||||
*
|
|
||||||
* The main thread should call shutdown() when its ready to return itself, which will cause
|
|
||||||
* the run() method of this thread to abort in the next iteration. Uses a local latch to
|
|
||||||
* cause the thread calling shutdown to block until the run() method exits.
|
|
||||||
*/
|
|
||||||
private class ErrorWatcherThread implements Runnable {
|
|
||||||
boolean done = false;
|
|
||||||
final CountDownLatch latch = new CountDownLatch(1);
|
|
||||||
|
|
||||||
private boolean isDone() {
|
|
||||||
return done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Shutdown this ErrorWatcher, blocking until the run() method of this thread exits
|
|
||||||
*
|
|
||||||
* @throws InterruptedException
|
|
||||||
*/
|
|
||||||
public void shutdown() throws InterruptedException {
|
|
||||||
this.done = true;
|
|
||||||
latch.await();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
while ( ! isDone() ) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(100);
|
|
||||||
handleErrorsIfOneOccurred();
|
|
||||||
} catch (final InterruptedException ex) {
|
|
||||||
break; // just exit
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// free the latch so the shutdown thread starts up
|
|
||||||
latch.countDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If an error has occurred in the tracker, shut down the executors and
|
|
||||||
* throw the occur, otherwise do nothing.
|
|
||||||
*/
|
|
||||||
private void handleErrorsIfOneOccurred() {
|
|
||||||
if ( errorTracker.hasAnErrorOccurred() ) {
|
|
||||||
mapExecutor.shutdownNow();
|
|
||||||
inputExecutor.shutdownNow();
|
|
||||||
errorWatchingExecutor.shutdownNow();
|
|
||||||
errorTracker.throwErrorIfPending();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes a single map job, reading the next element from the input inputQueue
|
|
||||||
* and after mapping runs reduce on as many elements as possible
|
|
||||||
*/
|
|
||||||
private class MapReduceJob implements Runnable {
|
private class MapReduceJob implements Runnable {
|
||||||
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue;
|
final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue;
|
||||||
final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue;
|
final PriorityBlockingQueue<MapResult<MapType>> mapResultQueue;
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class NanoSchedulerUnitTest extends BaseTest {
|
public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
private final static boolean debug = false;
|
private final static boolean debug = false;
|
||||||
public static final int NANO_SCHEDULE_MAX_RUNTIME = 10000;
|
public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000;
|
||||||
|
|
||||||
private static class Map2x implements NSMapFunction<Integer, Integer> {
|
private static class Map2x implements NSMapFunction<Integer, Integer> {
|
||||||
@Override public Integer apply(Integer input) { return input * 2; }
|
@Override public Integer apply(Integer input) { return input * 2; }
|
||||||
|
|
@ -228,12 +228,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
|
||||||
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
|
nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000, invocationCount = 50)
|
@Test(expectedExceptions = NullPointerException.class, timeOut = 10000)
|
||||||
public void testInputErrorIsThrown_NPE() throws InterruptedException {
|
public void testInputErrorIsThrown_NPE() throws InterruptedException {
|
||||||
executeTestErrorThrowingInput(new NullPointerException());
|
executeTestErrorThrowingInput(new NullPointerException());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000, invocationCount = 50)
|
@Test(expectedExceptions = ReviewedStingException.class, timeOut = 10000)
|
||||||
public void testInputErrorIsThrown_RSE() throws InterruptedException {
|
public void testInputErrorIsThrown_RSE() throws InterruptedException {
|
||||||
executeTestErrorThrowingInput(new ReviewedStingException("test"));
|
executeTestErrorThrowingInput(new ReviewedStingException("test"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue