From 857b11b26f9f2d5e78ee7565430b502962b564dc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 23 Aug 2012 09:59:37 -0400 Subject: [PATCH] Done with GSA-506: Add nt and efficiency information to GATKRunReport -- GATKRunReports contain itemized information about the numThreads used to execute the GATK, as well as the efficiency of the use of those threads to get real work done, including time spent running, waiting, blocking, and waiting for IO -- See https://jira.broadinstitute.org/browse/GSA-506 for more details --- .../sting/gatk/GenomeAnalysisEngine.java | 24 +++++++++---- .../sting/gatk/executive/MicroScheduler.java | 8 +++++ .../sting/gatk/phonehome/GATKRunReport.java | 34 +++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 0d1c34ced..c8dbb090d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -30,7 +30,6 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; -import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; @@ -52,18 +51,14 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.util.*; /** @@ -175,6 +170,13 @@ public class GenomeAnalysisEngine { */ private Collection referenceMetaDataFiles; + /** + * The threading efficiency monitor we use in the GATK to monitor our efficiency. + * + * May be null if one isn't active, or hasn't be initialized yet + */ + private ThreadEfficiencyMonitor threadEfficiencyMonitor = null; + /** * Set the reference metadata files to use for this traversal. * @param referenceMetaDataFiles Collection of files and descriptors over which to traverse. @@ -252,6 +254,7 @@ public class GenomeAnalysisEngine { // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = createMicroscheduler(); + threadEfficiencyMonitor = microScheduler.getThreadEfficiencyMonitor(); // create temp directories as necessary initializeTempDirectory(); @@ -1003,6 +1006,15 @@ public class GenomeAnalysisEngine { return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics(); } + /** + * Return the global ThreadEfficiencyMonitor, if there is one + * + * @return the monitor, or null if none is active + */ + public ThreadEfficiencyMonitor getThreadEfficiencyMonitor() { + return threadEfficiencyMonitor; + } + // ------------------------------------------------------------------------------------- // // code for working with Samples database diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index b755cdd77..4becc5a78 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -158,6 +158,14 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { } } + /** + * Return the ThreadEfficiencyMonitor we are using to track our resource utilization, if there is one + * + * @return the monitor, or null if none is active + */ + public ThreadEfficiencyMonitor getThreadEfficiencyMonitor() { + return threadEfficiencyMonitor; + } /** * Inform this Microscheduler to use the efficiency monitor used to create threads in subclasses diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 035252c14..6f3f175a2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; import org.jets3t.service.impl.rest.httpclient.RestS3Service; @@ -141,6 +142,21 @@ public class GATKRunReport { @Element(required = true, name = "tag") private String tag; + // ----------------------------------------------------------------- + // elements related to multi-threading and efficiency + // ----------------------------------------------------------------- + + @Element(required = true, name = "numThreads") + private int numThreads; + @Element(required = true, name = "percent_time_running") + private String percentTimeRunning; + @Element(required = true, name = "percent_time_waiting") + private String percentTimeWaiting; + @Element(required = true, name = "percent_time_blocking") + private String percentTimeBlocking; + @Element(required = true, name = "percent_time_waiting_for_io") + private String percentTimeWaitingForIO; + public enum PhoneHomeOption { /** Disable phone home */ NO_ET, @@ -201,12 +217,30 @@ public class GATKRunReport { // if there was an exception, capture it this.mException = e == null ? null : new ExceptionToXML(e); + + numThreads = engine.getArguments().numberOfThreads; + percentTimeRunning = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.USER_CPU); + percentTimeBlocking = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.BLOCKING); + percentTimeWaiting = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING); + percentTimeWaitingForIO = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING_FOR_IO); } public String getID() { return id; } + /** + * Return a string representing the percent of time the GATK spent in state, if possible. Otherwise return NA + * + * @param engine the GATK engine whose threading efficiency info we will use + * @param state the state whose occupancy we wish to know + * @return a string representation of the percent occupancy of state, or NA is not possible + */ + private String getThreadEfficiencyPercent(final GenomeAnalysisEngine engine, final ThreadEfficiencyMonitor.State state) { + final ThreadEfficiencyMonitor tem = engine.getThreadEfficiencyMonitor(); + return tem == null ? "NA" : String.format("%.2f", tem.getStatePercent(state)); + } + public void postReport(PhoneHomeOption type) { logger.debug("Posting report of type " + type);