--performanceLog (-PF) X.dat argument now enabled. Writes out a table (R-friendly) of the performance of the GATK over time, exactly as a more detailed version of the INFO progress meter. R script for useful plotting of the performance of the GATK over time. Will be helpful for upcoming scalability testing and debugging of memory leaks and other incremental performance problems

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4921 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-01-02 23:34:21 +00:00
parent 586d3f05d9
commit 5539c2d9f3
3 changed files with 52 additions and 7 deletions

View File

@ -0,0 +1,29 @@
args = commandArgs(TRUE);
RUNME = T
onCMDLine = ! is.na(args[1])
DATA_FILE = args[1]
DESCRIPTION = args[2]
OUTPUT_PDF = paste(DATA_FILE, ".pdf", sep="")
if ( onCMDLine ) {
print(paste("Reading data from", DATA_FILE))
d = read.table(DATA_FILE, header=T)
}
if ( onCMDLine ) pdf(OUTPUT_PDF)
generateOneReport <- function(d) {
qs = quantile(d$processing.speed, probs = c(0.05, 0.5, 0.95))
plot(d$elapsed.time, d$processing.speed, main=DESCRIPTION, xlab="Elapsed time (sec)", ylab="Processing speed (seconds per 1M units)", ylim=c(qs[1], qs[3]), type="b", col="cornflowerblue", lwd=2)
abline(h=qs[2], lty=2)
}
if ( RUNME ) {
generateOneReport(d)
}
if ( onCMDLine ) dev.off()

View File

@ -150,6 +150,11 @@ public class GATKArgumentCollection {
return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
}
// --------------------------------------------------------------------------------------------------------------
//
// BAQ arguments
//
// --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
@ -158,6 +163,15 @@ public class GATKArgumentCollection {
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty. Default value is 1e-4. 1e-3 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
// --------------------------------------------------------------------------------------------------------------
//
// performance log arguments
//
// --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
public File performanceLog = null;
/**
* Gets the default downsampling method, returned if the user didn't specify any downsampling
* method.
@ -350,6 +364,10 @@ public class GATKArgumentCollection {
if ( BAQMode != other.BAQMode) return false;
if ( BAQGOP != other.BAQGOP ) return false;
if ((other.performanceLog == null && this.performanceLog != null) ||
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog))) {
return false;
return true;
}

View File

@ -52,7 +52,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
private final long N_RECORDS_TO_PRINT = 1000000;
// for performance log
private static final boolean PERFORMANCE_LOG_ENABLED = false;
private static final boolean PERFORMANCE_LOG_ENABLED = true;
private PrintStream performanceLog = null;
private long lastPerformanceLogPrintTime = -1; // When was the last time we printed to the performance log?
private final long PERFORMANCE_LOG_PRINT_FREQUENCY = 1 * 1000; // in seconds
@ -126,17 +126,15 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
//
// code to process the performance log
// TODO -- should be integrated into command line system [hard coded off now]
// TODO -- should write a unique log name as an option?
//
// TODO -- should be controlled by Queue so that .out and .performance.log comes out
//
if ( PERFORMANCE_LOG_ENABLED && performanceLog == null ) {
if ( PERFORMANCE_LOG_ENABLED && performanceLog == null && engine.getArguments().performanceLog != null ) {
try {
// todo -- temp for testing
performanceLog = new PrintStream(new FileOutputStream("performance.log"));
performanceLog = new PrintStream(new FileOutputStream(engine.getArguments().performanceLog));
performanceLog.println(Utils.join("\t", Arrays.asList("elapsed.time", "units.processed", "processing.speed")));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(new File("performance.log"), e);
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().performanceLog, e);
}
}