Fix requested by Lee Lichtenstein: first check to see whether it's time for

a progress message, then aggregate metrics.  Makes the overhead of
printProgress in RealignerTargetCreator go from >20% to ~3%.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5663 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-04-20 03:22:48 +00:00
parent d35c7d1029
commit 54660a8c25
5 changed files with 53 additions and 58 deletions

View File

@ -176,7 +176,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
outputTracker.bypassThreadLocalStorage(true);
try {
walker.onTraversalDone(result);
printOnTraversalDone(result,engine.getCumulativeMetrics());
printOnTraversalDone(result);
}
finally {
outputTracker.bypassThreadLocalStorage(false);

View File

@ -84,7 +84,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = accumulator.finishTraversal();
printOnTraversalDone(result,engine.getCumulativeMetrics());
printOnTraversalDone(result);
outputTracker.close();
cleanup();

View File

@ -217,8 +217,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* Print summary information for the analysis.
* @param sum The final reduce output.
*/
protected void printOnTraversalDone(Object sum, ReadMetrics metrics) {
traversalEngine.printOnTraversalDone(metrics);
protected void printOnTraversalDone(Object sum) {
traversalEngine.printOnTraversalDone();
}
/**

View File

@ -119,6 +119,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
private boolean progressMeterInitialized = false;
// for performance log
private static final boolean PERFORMANCE_LOG_ENABLED = true;
@ -229,10 +230,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
*/
public void printProgress(Shard shard, GenomeLoc loc) {
// A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false);
printProgress(loc,shard.getReadMetrics(),false);
}
/**
@ -240,66 +238,62 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
* every M seconds, for N and M set in global variables.
*
* @param loc Current location, can be null if you are at the end of the traversal
* @param metrics Metrics of reads filtered in/out.
* @param metrics Data processed since the last cumulative
* @param mustPrint If true, will print out info, regardless of nRecords or time interval
*/
private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) {
final long nRecords = metrics.getNumIterations();
if ( nRecords == 1 && mustPrint == false ) {
if(!progressMeterInitialized && mustPrint == false ) {
logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]");
logger.info(String.format("%15s processed.%s runtime per.1M.%s completed total.runtime remaining",
"Location", getTraversalType(), getTraversalType()));
progressMeterInitialized = true;
}
else {
final long curTime = timer.currentTime();
boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY);
boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
if ( printProgress || printLog ) {
ProcessingHistory last = updateHistory(loc, metrics);
final long curTime = timer.currentTime();
boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY);
boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
final MyTime elapsed = new MyTime(last.elapsedSeconds);
final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
if ( printProgress || printLog ) {
// getting and appending metrics data actually turns out to be quite a heavyweight
// operation. Postpone it until after determining whether to print the log message.
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics() : new ReadMetrics();
if(metrics != null)
cumulativeMetrics.incrementMetrics(metrics);
if ( printProgress ) {
lastProgressPrintTime = curTime;
// dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
else
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
final long nRecords = cumulativeMetrics.getNumIterations();
// String common = String.format("%4.1e %s in %s, %s per 1M %s, %4.1f%% complete, est. runtime %s, %s remaining",
// nRecords*1.0, getTraversalType(), elapsed, unitRate,
// getTraversalType(), 100*fractionGenomeTargetCompleted,
// estTotalRuntime, timeToCompletion);
//
// if (loc != null)
// logger.info(String.format("%20s: processing %s", loc, common));
// else
// logger.info(String.format("Processing %s", common));
logger.info(String.format("%15s %5.2e %s %s %4.1f%% %s %s",
loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate,
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
}
final MyTime elapsed = new MyTime(last.elapsedSeconds);
final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
final MyTime unitRate = new MyTime(secondsPerMillionElements(last));
final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted);
final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t);
if ( printLog ) {
lastPerformanceLogPrintTime = curTime;
synchronized(performanceLogLock) {
performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
}
if ( printProgress ) {
lastProgressPrintTime = curTime;
// dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS )
PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
else
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
logger.info(String.format("%15s %5.2e %s %s %4.1f%% %s %s",
loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate,
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
}
if ( printLog ) {
lastPerformanceLogPrintTime = curTime;
synchronized(performanceLogLock) {
performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n",
elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t,
fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t);
}
}
}
@ -345,11 +339,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/**
* Called after a traversal to print out information about the traversal process
*/
public void printOnTraversalDone(ReadMetrics cumulativeMetrics) {
printProgress(null, cumulativeMetrics, true);
public void printOnTraversalDone() {
printProgress(null, null, true);
final double elapsed = timer.getElapsedTime();
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
// count up the number of skipped reads by summing over all filters
long nSkippedReads = 0L;
for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())

View File

@ -138,7 +138,6 @@ public class TraverseReadsUnitTest extends BaseTest {
dataProvider.close();
}
traversalEngine.printOnTraversalDone(new ReadMetrics());
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {