Fix requested by Lee Lichtenstein: first check to see whether it's time for

a progress message, then aggregate metrics.  Makes the overhead of
printProgress in RealignerTargetCreator go from >20% to ~3%.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5663 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-04-20 03:22:48 +00:00
parent d35c7d1029
commit 54660a8c25
5 changed files with 53 additions and 58 deletions

View File

@ -176,7 +176,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
outputTracker.bypassThreadLocalStorage(true); outputTracker.bypassThreadLocalStorage(true);
try { try {
walker.onTraversalDone(result); walker.onTraversalDone(result);
printOnTraversalDone(result,engine.getCumulativeMetrics()); printOnTraversalDone(result);
} }
finally { finally {
outputTracker.bypassThreadLocalStorage(false); outputTracker.bypassThreadLocalStorage(false);

View File

@ -84,7 +84,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = accumulator.finishTraversal(); Object result = accumulator.finishTraversal();
printOnTraversalDone(result,engine.getCumulativeMetrics()); printOnTraversalDone(result);
outputTracker.close(); outputTracker.close();
cleanup(); cleanup();

View File

@ -217,8 +217,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* Print summary information for the analysis. * Print summary information for the analysis.
* @param sum The final reduce output. * @param sum The final reduce output.
*/ */
protected void printOnTraversalDone(Object sum, ReadMetrics metrics) { protected void printOnTraversalDone(Object sum) {
traversalEngine.printOnTraversalDone(metrics); traversalEngine.printOnTraversalDone();
} }
/** /**

View File

@ -119,6 +119,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0; private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0; private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
private boolean progressMeterInitialized = false;
// for performance log // for performance log
private static final boolean PERFORMANCE_LOG_ENABLED = true; private static final boolean PERFORMANCE_LOG_ENABLED = true;
@ -229,10 +230,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
*/ */
public void printProgress(Shard shard, GenomeLoc loc) { public void printProgress(Shard shard, GenomeLoc loc) {
// A bypass is inserted here for unit testing. // A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. printProgress(loc,shard.getReadMetrics(),false);
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false);
} }
/** /**
@ -240,25 +238,31 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
* every M seconds, for N and M set in global variables. * every M seconds, for N and M set in global variables.
* *
* @param loc Current location, can be null if you are at the end of the traversal * @param loc Current location, can be null if you are at the end of the traversal
* @param metrics Metrics of reads filtered in/out. * @param metrics Data processed since the last cumulative
* @param mustPrint If true, will print out info, regardless of nRecords or time interval * @param mustPrint If true, will print out info, regardless of nRecords or time interval
*/ */
private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) { private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) {
final long nRecords = metrics.getNumIterations(); if(!progressMeterInitialized && mustPrint == false ) {
if ( nRecords == 1 && mustPrint == false ) {
logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]"); logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]");
logger.info(String.format("%15s processed.%s runtime per.1M.%s completed total.runtime remaining", logger.info(String.format("%15s processed.%s runtime per.1M.%s completed total.runtime remaining",
"Location", getTraversalType(), getTraversalType())); "Location", getTraversalType(), getTraversalType()));
progressMeterInitialized = true;
} }
else {
final long curTime = timer.currentTime(); final long curTime = timer.currentTime();
boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY); boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY);
boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY); boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
if ( printProgress || printLog ) { if ( printProgress || printLog ) {
ProcessingHistory last = updateHistory(loc, metrics); // getting and appending metrics data actually turns out to be quite a heavyweight
// operation. Postpone it until after determining whether to print the log message.
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics() : new ReadMetrics();
if(metrics != null)
cumulativeMetrics.incrementMetrics(metrics);
final long nRecords = cumulativeMetrics.getNumIterations();
ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
final MyTime elapsed = new MyTime(last.elapsedSeconds); final MyTime elapsed = new MyTime(last.elapsedSeconds);
final MyTime bpRate = new MyTime(secondsPerMillionBP(last)); final MyTime bpRate = new MyTime(secondsPerMillionBP(last));
@ -278,15 +282,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
else else
PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
// String common = String.format("%4.1e %s in %s, %s per 1M %s, %4.1f%% complete, est. runtime %s, %s remaining",
// nRecords*1.0, getTraversalType(), elapsed, unitRate,
// getTraversalType(), 100*fractionGenomeTargetCompleted,
// estTotalRuntime, timeToCompletion);
//
// if (loc != null)
// logger.info(String.format("%20s: processing %s", loc, common));
// else
// logger.info(String.format("Processing %s", common));
logger.info(String.format("%15s %5.2e %s %s %4.1f%% %s %s", logger.info(String.format("%15s %5.2e %s %s %4.1f%% %s %s",
loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate, loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate,
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion)); 100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
@ -303,7 +298,6 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
} }
} }
} }
}
/** /**
* Keeps track of the last HISTORY_WINDOW_SIZE data points for the progress meter. Currently the * Keeps track of the last HISTORY_WINDOW_SIZE data points for the progress meter. Currently the
@ -345,11 +339,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/** /**
* Called after a traversal to print out information about the traversal process * Called after a traversal to print out information about the traversal process
*/ */
public void printOnTraversalDone(ReadMetrics cumulativeMetrics) { public void printOnTraversalDone() {
printProgress(null, cumulativeMetrics, true); printProgress(null, null, true);
final double elapsed = timer.getElapsedTime(); final double elapsed = timer.getElapsedTime();
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
// count up the number of skipped reads by summing over all filters // count up the number of skipped reads by summing over all filters
long nSkippedReads = 0L; long nSkippedReads = 0L;
for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet()) for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())

View File

@ -138,7 +138,6 @@ public class TraverseReadsUnitTest extends BaseTest {
dataProvider.close(); dataProvider.close();
} }
traversalEngine.printOnTraversalDone(new ReadMetrics());
countReadWalker.onTraversalDone(accumulator); countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) { if (!(accumulator instanceof Integer)) {