GSA-507: Thread monitoring refactored so it can work without a thread factory

-- Old version StateMonitoringThreadFactory refactored into base class ThreadEfficiencyMonitor and subclass EfficiencyMonitoringThreadFactory.
-- Base class is used by LinearMicroScheduler to monitor performance of GATK in single threaded mode
-- MicroScheduler now handles management of the efficiency monitor.  Includes master thread in monitor, meaning that reduce is now included for both schedulers
This commit is contained in:
Mark DePristo 2012-08-22 11:31:14 -04:00
parent f876c51277
commit e1293f0ef2
7 changed files with 431 additions and 356 deletions

View File

@ -11,7 +11,7 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.threading.StateMonitoringThreadFactory;
import org.broadinstitute.sting.utils.threading.EfficiencyMonitoringThreadFactory;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
import java.util.Collection;
@ -73,9 +73,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
/** What is the total time spent merging output? */
private long totalOutputMergeTime = 0;
/** may be null */
final StateMonitoringThreadFactory monitoringThreadFactory;
/**
* Create a new hierarchical microscheduler to process the given reads and reference.
*
@ -94,10 +91,10 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
super(engine, walker, reads, reference, rods);
if ( monitorThreadPerformance ) {
this.monitoringThreadFactory = new StateMonitoringThreadFactory(nThreadsToUse);
final EfficiencyMonitoringThreadFactory monitoringThreadFactory = new EfficiencyMonitoringThreadFactory(nThreadsToUse);
setThreadEfficiencyMonitor(monitoringThreadFactory);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse, monitoringThreadFactory);
} else {
this.monitoringThreadFactory = null;
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
}
}
@ -157,19 +154,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
// do final cleanup operations
outputTracker.close();
cleanup();
printThreadingEfficiency();
executionIsDone();
return result;
}
/**
* Print out the threading efficiency of this HMS, if state monitoring is enabled
*/
private void printThreadingEfficiency() {
if ( monitoringThreadFactory != null )
monitoringThreadFactory.printUsageInformation(logger);
}
/**
* Run the initialize method of the walker. Ensure that any calls
* to the output stream will bypass thread local storage and write

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import java.util.Collection;
@ -33,8 +34,17 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param reference Reference for driving the traversal.
* @param rods Reference-ordered data.
*/
protected LinearMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods ) {
protected LinearMicroScheduler(final GenomeAnalysisEngine engine,
final Walker walker,
final SAMDataSource reads,
final IndexedFastaSequenceFile reference,
final Collection<ReferenceOrderedDataSource> rods,
final boolean monitorThreadPerformance ) {
super(engine, walker, reads, reference, rods);
if ( monitorThreadPerformance )
setThreadEfficiencyMonitor(new ThreadEfficiencyMonitor());
}
/**
@ -88,6 +98,7 @@ public class LinearMicroScheduler extends MicroScheduler {
outputTracker.close();
cleanup();
executionIsDone();
return accumulator;
}

View File

@ -39,6 +39,8 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.EfficiencyMonitoringThreadFactory;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import javax.management.JMException;
import javax.management.MBeanServer;
@ -79,6 +81,13 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
private final MBeanServer mBeanServer;
private final ObjectName mBeanName;
/**
* Threading efficiency monitor for tracking the resource utilization of the GATK
*
* may be null
*/
ThreadEfficiencyMonitor threadEfficiencyMonitor = null;
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
* selected walker.
@ -102,7 +111,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
} else {
if(threadAllocation.getNumCPUThreads() > 1)
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
return new LinearMicroScheduler(engine, walker, reads, reference, rods);
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.shouldMonitorThreads());
}
}
@ -150,6 +159,16 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
}
}
/**
* Inform this Microscheduler to use the efficiency monitor used to create threads in subclasses
*
* @param threadEfficiencyMonitor
*/
public void setThreadEfficiencyMonitor(final ThreadEfficiencyMonitor threadEfficiencyMonitor) {
this.threadEfficiencyMonitor = threadEfficiencyMonitor;
}
/**
* Walks a walker over the given list of intervals.
*
@ -183,6 +202,18 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
traversalEngine.printOnTraversalDone();
}
/**
* Must be called by subclasses when execute is done
*/
protected void executionIsDone() {
// Print out the threading efficiency of this HMS, if state monitoring is enabled
if ( threadEfficiencyMonitor != null ) {
// include the master thread information
threadEfficiencyMonitor.threadIsDone(Thread.currentThread());
threadEfficiencyMonitor.printUsageInformation(logger);
}
}
/**
* Gets the engine that created this microscheduler.
* @return The engine owning this microscheduler.

View File

@ -0,0 +1,159 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.threading;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
/**
* Creates threads that automatically monitor their efficiency via the parent ThreadEfficiencyMonitor
*
* User: depristo
* Date: 8/14/12
* Time: 8:47 AM
*/
@Invariant({
"activeThreads.size() <= nThreadsToCreate",
"countDownLatch.getCount() <= nThreadsToCreate",
"nThreadsCreated <= nThreadsToCreate"
})
public class EfficiencyMonitoringThreadFactory extends ThreadEfficiencyMonitor implements ThreadFactory {
final int nThreadsToCreate;
final List<Thread> activeThreads;
int nThreadsCreated = 0;
/**
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
* on the final times is freed to run.
*/
final CountDownLatch countDownLatch;
/**
* Create a new factory generating threads whose runtime and contention
* behavior is tracked in this factory.
*
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
*/
public EfficiencyMonitoringThreadFactory(final int nThreadsToCreate) {
super();
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
this.nThreadsToCreate = nThreadsToCreate;
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
countDownLatch = new CountDownLatch(nThreadsToCreate);
}
/**
* How many threads have been created by this factory so far?
* @return
*/
@Ensures("result >= 0")
public int getNThreadsCreated() {
return nThreadsCreated;
}
/**
* Only useful for testing, so that we can wait for all of the threads in the factory to complete running
*
* @throws InterruptedException
*/
protected void waitForAllThreadsToComplete() throws InterruptedException {
countDownLatch.await();
}
@Ensures({
"activeThreads.size() < old(activeThreads.size())",
"! activeThreads.contains(thread)",
"countDownLatch.getCount() < old(countDownLatch.getCount())"
})
@Override
public synchronized void threadIsDone(final Thread thread) {
nThreadsAnalyzed++;
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
super.threadIsDone(thread);
// remove the thread from the list of active activeThreads
if ( ! activeThreads.remove(thread) )
throw new IllegalStateException("Thread " + thread + " not in list of active activeThreads");
// one less thread is live for those blocking on all activeThreads to be complete
countDownLatch.countDown();
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
}
/**
* Create a new thread from this factory
*
* @param runnable
* @return
*/
@Override
@Ensures({
"activeThreads.size() > old(activeThreads.size())",
"activeThreads.contains(result)",
"nThreadsCreated == old(nThreadsCreated) + 1"
})
public synchronized Thread newThread(final Runnable runnable) {
if ( activeThreads.size() >= nThreadsToCreate)
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
nThreadsCreated++;
final Thread myThread = new TrackingThread(runnable);
activeThreads.add(myThread);
return myThread;
}
/**
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
*/
private class TrackingThread extends Thread {
private TrackingThread(Runnable runnable) {
super(runnable);
}
@Override
public void run() {
super.run();
threadIsDone(this);
}
}
}

View File

@ -1,321 +0,0 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.threading;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
/**
* Create activeThreads, collecting statistics about their running state over time
*
* Uses a ThreadMXBean to capture info via ThreadInfo
*
* User: depristo
* Date: 8/14/12
* Time: 8:47 AM
*/
@Invariant({
"activeThreads.size() <= nThreadsToCreate",
"countDownLatch.getCount() <= nThreadsToCreate",
"nThreadsCreated <= nThreadsToCreate"
})
public class StateMonitoringThreadFactory implements ThreadFactory {
protected static final boolean DEBUG = true;
private static Logger logger = Logger.getLogger(StateMonitoringThreadFactory.class);
public enum State {
BLOCKING("blocking on synchronized data structure"),
WAITING("waiting on some other thread"),
USER_CPU("doing productive CPU work"),
WAITING_FOR_IO("waiting for I/O");
private final String userFriendlyName;
private State(String userFriendlyName) {
this.userFriendlyName = userFriendlyName;
}
public String getUserFriendlyName() {
return userFriendlyName;
}
}
// todo -- it would be nice to not have to specify upfront the number of threads.
// todo -- can we dynamically increment countDownLatch? It seems not...
final int nThreadsToCreate;
final List<Thread> activeThreads;
final EnumMap<State, Long> times = new EnumMap<State, Long>(State.class);
int nThreadsCreated = 0;
/**
* The bean used to get the thread info about blocked and waiting times
*/
final ThreadMXBean bean;
/**
* Counts down the number of active activeThreads whose runtime info hasn't been incorporated into
* times. Counts down from nThreadsToCreate to 0, at which point any code waiting
* on the final times is freed to run.
*/
final CountDownLatch countDownLatch;
/**
* Create a new factory generating threads whose runtime and contention
* behavior is tracked in this factory.
*
* @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete
* // TODO -- remove argument when we figure out how to implement this capability
*/
public StateMonitoringThreadFactory(final int nThreadsToCreate) {
if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate);
this.nThreadsToCreate = nThreadsToCreate;
activeThreads = new ArrayList<Thread>(nThreadsToCreate);
// initialize times to 0
for ( final State state : State.values() )
times.put(state, 0l);
// get the bean, and start tracking
bean = ManagementFactory.getThreadMXBean();
if ( bean.isThreadContentionMonitoringSupported() )
bean.setThreadContentionMonitoringEnabled(true);
else
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
//bean.setThreadCpuTimeEnabled(true);
if ( bean.isThreadCpuTimeSupported() )
bean.setThreadCpuTimeEnabled(true);
else
logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency");
countDownLatch = new CountDownLatch(nThreadsToCreate);
}
/**
* Get the time spent in state across all threads created by this factory
*
* @param state to get information about
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getStateTime(final State state) {
return times.get(state);
}
/**
* Get the total time spent in all states across all threads created by this factory
*
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getTotalTime() {
long total = 0;
for ( final long time : times.values() )
total += time;
return total;
}
/**
* Get the fraction of time spent in state across all threads created by this factory
*
* @return the fraction (0.0-1.0) of time spent in state over all state times of all threads
*/
@Ensures({"result >= 0.0", "result <= 1.0"})
public synchronized double getStateFraction(final State state) {
return getStateTime(state) / (1.0 * Math.max(getTotalTime(), 1));
}
/**
* How many threads have been created by this factory so far?
* @return
*/
@Ensures("result >= 0")
public int getNThreadsCreated() {
return nThreadsCreated;
}
/**
* Only useful for testing, so that we can wait for all of the threads in the factory to complete running
*
* @throws InterruptedException
*/
protected void waitForAllThreadsToComplete() throws InterruptedException {
countDownLatch.await();
}
@Override
public synchronized String toString() {
final StringBuilder b = new StringBuilder();
b.append("total ").append(getTotalTime()).append(" ");
for ( final State state : State.values() ) {
b.append(state).append(" ").append(getStateTime(state)).append(" ");
}
return b.toString();
}
/**
* Print usage information about threads from this factory to logger
* with the INFO priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger) {
printUsageInformation(logger, Priority.INFO);
}
/**
* Print usage information about threads from this factory to logger
* with the provided priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
logger.log(priority, "Number of activeThreads used: " + getNThreadsCreated());
logger.log(priority, "Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
for ( final State state : State.values() ) {
logger.log(priority, String.format(" Fraction of time spent %s is %.2f (%s)",
state.getUserFriendlyName(),
getStateFraction(state),
new AutoFormattingTime(getStateTime(state) / 1000.0)));
}
logger.log(priority, String.format("CPU efficiency : %.2f%% of time spent doing productive work",
getStateFraction(State.USER_CPU) * 100));
logger.log(priority, String.format("I/O inefficiency: %.2f%% of time spent waiting on I/O",
getStateFraction(State.WAITING_FOR_IO) * 100));
}
/**
* Create a new thread from this factory
*
* @param runnable
* @return
*/
@Override
@Ensures({
"activeThreads.size() > old(activeThreads.size())",
"activeThreads.contains(result)",
"nThreadsCreated == old(nThreadsCreated) + 1"
})
public synchronized Thread newThread(final Runnable runnable) {
if ( activeThreads.size() >= nThreadsToCreate)
throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate);
nThreadsCreated++;
final Thread myThread = new TrackingThread(runnable);
activeThreads.add(myThread);
return myThread;
}
/**
* Update the information about completed thread that ran for runtime in milliseconds
*
* This method updates all of the key timing and tracking information in the factory so that
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
*
* @param thread the thread whose information we are updating
*/
@Ensures({
"activeThreads.size() < old(activeThreads.size())",
"! activeThreads.contains(thread)",
"getTotalTime() >= old(getTotalTime())",
"countDownLatch.getCount() < old(countDownLatch.getCount())"
})
private synchronized void threadIsDone(final Thread thread) {
if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
final long threadID = thread.getId();
final ThreadInfo info = bean.getThreadInfo(thread.getId());
final long totalTimeNano = bean.getThreadCpuTime(threadID);
final long userTimeNano = bean.getThreadUserTime(threadID);
final long systemTimeNano = totalTimeNano - userTimeNano;
final long userTimeInMilliseconds = nanoToMilli(userTimeNano);
final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano);
if ( info != null ) {
if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
incTimes(State.BLOCKING, info.getBlockedTime());
incTimes(State.WAITING, info.getWaitedTime());
incTimes(State.USER_CPU, userTimeInMilliseconds);
incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds);
}
// remove the thread from the list of active activeThreads
if ( ! activeThreads.remove(thread) )
throw new IllegalStateException("Thread " + thread + " not in list of active activeThreads");
// one less thread is live for those blocking on all activeThreads to be complete
countDownLatch.countDown();
if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName());
}
/**
* Helper function that increments the times counter by by for state
*
* @param state
* @param by
*/
@Requires({"state != null", "by >= 0"})
@Ensures("getTotalTime() == old(getTotalTime()) + by")
private synchronized void incTimes(final State state, final long by) {
times.put(state, times.get(state) + by);
}
private static long nanoToMilli(final long timeInNano) {
return TimeUnit.NANOSECONDS.toMillis(timeInNano);
}
/**
* A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete
*/
private class TrackingThread extends Thread {
private TrackingThread(Runnable runnable) {
super(runnable);
}
@Override
public void run() {
super.run();
threadIsDone(this);
}
}
}

View File

@ -0,0 +1,206 @@
package org.broadinstitute.sting.utils.threading;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.EnumMap;
import java.util.concurrent.TimeUnit;
/**
* Uses an MXBean to monitor thread efficiency
*
* Once the monitor is created, calls to threadIsDone() can be used to add information
* about the efficiency of the provided thread to this monitor.
*
* Provides simple print() for displaying efficiency information to a logger
*
* User: depristo
* Date: 8/22/12
* Time: 10:48 AM
*/
@Invariant({"nThreadsAnalyzed >= 0"})
public class ThreadEfficiencyMonitor {
protected static final boolean DEBUG = false;
protected static Logger logger = Logger.getLogger(EfficiencyMonitoringThreadFactory.class);
final EnumMap<State, Long> times = new EnumMap<State, Long>(State.class);
/**
* The number of threads we've included in our efficiency monitoring
*/
int nThreadsAnalyzed = 0;
/**
* The bean used to get the thread info about blocked and waiting times
*/
final ThreadMXBean bean;
public ThreadEfficiencyMonitor() {
bean = ManagementFactory.getThreadMXBean();
// get the bean, and start tracking
if ( bean.isThreadContentionMonitoringSupported() )
bean.setThreadContentionMonitoringEnabled(true);
else
logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency");
//bean.setThreadCpuTimeEnabled(true);
if ( bean.isThreadCpuTimeSupported() )
bean.setThreadCpuTimeEnabled(true);
else
logger.warn("Thread CPU monitoring not supported, we cannot track GATK multi-threaded efficiency");
// initialize times to 0
for ( final State state : State.values() )
times.put(state, 0l);
}
private static long nanoToMilli(final long timeInNano) {
return TimeUnit.NANOSECONDS.toMillis(timeInNano);
}
/**
* Get the time spent in state across all threads created by this factory
*
* @param state to get information about
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getStateTime(final State state) {
return times.get(state);
}
/**
* Get the total time spent in all states across all threads created by this factory
*
* @return the time in milliseconds
*/
@Ensures({"result >= 0"})
public synchronized long getTotalTime() {
long total = 0;
for ( final long time : times.values() )
total += time;
return total;
}
/**
* Get the fraction of time spent in state across all threads created by this factory
*
* @return the percentage (0.0-100.0) of time spent in state over all state times of all threads
*/
@Ensures({"result >= 0.0", "result <= 100.0"})
public synchronized double getStatePercent(final State state) {
return (100.0 * getStateTime(state)) / Math.max(getTotalTime(), 1);
}
public int getnThreadsAnalyzed() {
return nThreadsAnalyzed;
}
@Override
public synchronized String toString() {
final StringBuilder b = new StringBuilder();
b.append("total ").append(getTotalTime()).append(" ");
for ( final State state : State.values() ) {
b.append(state).append(" ").append(getStateTime(state)).append(" ");
}
return b.toString();
}
/**
* Print usage information about threads from this factory to logger
* with the INFO priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger) {
printUsageInformation(logger, Priority.INFO);
}
/**
* Print usage information about threads from this factory to logger
* with the provided priority
*
* @param logger
*/
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
logger.debug("Number of threads monitored: " + getnThreadsAnalyzed());
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
for ( final State state : State.values() ) {
logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state)));
}
logger.log(priority, String.format("CPU efficiency : %6.2f%% of time spent %s", getStatePercent(State.USER_CPU), State.USER_CPU.getUserFriendlyName()));
logger.log(priority, String.format("Walker inefficiency : %6.2f%% of time spent %s", getStatePercent(State.BLOCKING), State.BLOCKING.getUserFriendlyName()));
logger.log(priority, String.format("I/O inefficiency : %6.2f%% of time spent %s", getStatePercent(State.WAITING_FOR_IO), State.WAITING_FOR_IO.getUserFriendlyName()));
}
/**
* Update the information about completed thread that ran for runtime in milliseconds
*
* This method updates all of the key timing and tracking information in the factory so that
* thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer
*
* @param thread the thread whose information we are updating
*/
@Ensures({
"getTotalTime() >= old(getTotalTime())"
})
public synchronized void threadIsDone(final Thread thread) {
nThreadsAnalyzed++;
if ( DEBUG ) logger.warn("UpdateThreadInfo called");
final long threadID = thread.getId();
final ThreadInfo info = bean.getThreadInfo(thread.getId());
final long totalTimeNano = bean.getThreadCpuTime(threadID);
final long userTimeNano = bean.getThreadUserTime(threadID);
final long systemTimeNano = totalTimeNano - userTimeNano;
final long userTimeInMilliseconds = nanoToMilli(userTimeNano);
final long systemTimeInMilliseconds = nanoToMilli(systemTimeNano);
if ( info != null ) {
if ( DEBUG ) logger.warn("Updating thread with user runtime " + userTimeInMilliseconds + " and system runtime " + systemTimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime());
incTimes(State.BLOCKING, info.getBlockedTime());
incTimes(State.WAITING, info.getWaitedTime());
incTimes(State.USER_CPU, userTimeInMilliseconds);
incTimes(State.WAITING_FOR_IO, systemTimeInMilliseconds);
}
}
/**
* Helper function that increments the times counter by by for state
*
* @param state
* @param by
*/
@Requires({"state != null", "by >= 0"})
@Ensures("getTotalTime() == old(getTotalTime()) + by")
private synchronized void incTimes(final State state, final long by) {
times.put(state, times.get(state) + by);
}
public enum State {
BLOCKING("blocking on synchronized data structures"),
WAITING("waiting on some other thread"),
USER_CPU("doing productive CPU work"),
WAITING_FOR_IO("waiting for I/O");
private final String userFriendlyName;
private State(String userFriendlyName) {
this.userFriendlyName = userFriendlyName;
}
public String getUserFriendlyName() {
return userFriendlyName;
}
}
}

View File

@ -47,24 +47,24 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
private class StateTest extends TestDataProvider {
private final double TOLERANCE = 0.1; // willing to tolerate a 10% error
final List<StateMonitoringThreadFactory.State> statesForThreads;
final List<EfficiencyMonitoringThreadFactory.State> statesForThreads;
public StateTest(final List<StateMonitoringThreadFactory.State> statesForThreads) {
public StateTest(final List<EfficiencyMonitoringThreadFactory.State> statesForThreads) {
super(StateTest.class);
this.statesForThreads = statesForThreads;
setName("StateTest " + Utils.join(",", statesForThreads));
}
public List<StateMonitoringThreadFactory.State> getStatesForThreads() {
public List<EfficiencyMonitoringThreadFactory.State> getStatesForThreads() {
return statesForThreads;
}
public int getNStates() { return statesForThreads.size(); }
public double maxStateFraction(final StateMonitoringThreadFactory.State state) { return fraction(state) + TOLERANCE; }
public double minStateFraction(final StateMonitoringThreadFactory.State state) { return fraction(state) - TOLERANCE; }
public double maxStateFraction(final EfficiencyMonitoringThreadFactory.State state) { return fraction(state) + TOLERANCE; }
public double minStateFraction(final EfficiencyMonitoringThreadFactory.State state) { return fraction(state) - TOLERANCE; }
private double fraction(final StateMonitoringThreadFactory.State state) {
private double fraction(final EfficiencyMonitoringThreadFactory.State state) {
return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size());
}
}
@ -74,9 +74,9 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
* requested for input argument
*/
private static class StateTestThread implements Callable<Double> {
private final StateMonitoringThreadFactory.State stateToImplement;
private final EfficiencyMonitoringThreadFactory.State stateToImplement;
private StateTestThread(final StateMonitoringThreadFactory.State stateToImplement) {
private StateTestThread(final EfficiencyMonitoringThreadFactory.State stateToImplement) {
this.stateToImplement = stateToImplement;
}
@ -95,10 +95,10 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND);
return 0.0;
case BLOCKING:
if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn("Blocking...");
synchronized (GLOBAL_LOCK) {
// the GLOBAL_LOCK must be held by the unit test itself for this to properly block
if ( StateMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
if ( EfficiencyMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking");
}
return 0.0;
case WAITING_FOR_IO:
@ -114,10 +114,10 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
@DataProvider(name = "StateTest")
public Object[][] createStateTest() {
for ( final int nThreads : Arrays.asList(3) ) {
//final List<StateMonitoringThreadFactory.State> allStates = Arrays.asList(StateMonitoringThreadFactory.State.WAITING_FOR_IO);
final List<StateMonitoringThreadFactory.State> allStates = Arrays.asList(StateMonitoringThreadFactory.State.USER_CPU, StateMonitoringThreadFactory.State.WAITING, StateMonitoringThreadFactory.State.BLOCKING);
//final List<StateMonitoringThreadFactory.State> allStates = Arrays.asList(StateMonitoringThreadFactory.State.values());
for (final List<StateMonitoringThreadFactory.State> states : Utils.makePermutations(allStates, nThreads, true) ) {
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.WAITING_FOR_IO);
final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.USER_CPU, EfficiencyMonitoringThreadFactory.State.WAITING, EfficiencyMonitoringThreadFactory.State.BLOCKING);
//final List<EfficiencyMonitoringThreadFactory.State> allStates = Arrays.asList(EfficiencyMonitoringThreadFactory.State.values());
for (final List<EfficiencyMonitoringThreadFactory.State> states : Utils.makePermutations(allStates, nThreads, true) ) {
//if ( Collections.frequency(states, Thread.State.BLOCKED) > 0)
new StateTest(states);
}
@ -129,13 +129,13 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "StateTest")
public void testStateTest(final StateTest test) throws InterruptedException {
// allows us to test blocking
final StateMonitoringThreadFactory factory = new StateMonitoringThreadFactory(test.getNStates());
final EfficiencyMonitoringThreadFactory factory = new EfficiencyMonitoringThreadFactory(test.getNStates());
final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory);
logger.warn("Running " + test);
synchronized (GLOBAL_LOCK) {
//logger.warn(" Have lock");
for ( final StateMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() )
for ( final EfficiencyMonitoringThreadFactory.State threadToRunState : test.getStatesForThreads() )
threadPool.submit(new StateTestThread(threadToRunState));
// lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads
@ -158,10 +158,10 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest {
Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime);
Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime);
for (final StateMonitoringThreadFactory.State state : StateMonitoringThreadFactory.State.values() ) {
for (final EfficiencyMonitoringThreadFactory.State state : EfficiencyMonitoringThreadFactory.State.values() ) {
final double min = test.minStateFraction(state);
final double max = test.maxStateFraction(state);
final double obs = factory.getStateFraction(state);
final double obs = factory.getStatePercent(state);
// logger.warn(" Checking " + state
// + " min " + String.format("%.2f", min)
// + " max " + String.format("%.2f", max)