diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java new file mode 100644 index 000000000..868ea89b5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.Advanced; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; +import org.broadinstitute.sting.utils.PathUtils; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +/** + * Generic service for executing RScripts in the GATK directory + * + * @author Your Name + * @since Date created + */ +public class RScriptExecutor { + /** + * our log + */ + protected static Logger logger = Logger.getLogger(RScriptExecutor.class); + + public static class RScriptArgumentCollection { + @Advanced + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + private String PATH_TO_RSCRIPT = "Rscript"; + + @Advanced + @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + private List PATH_TO_RESOURCES = Arrays.asList("public/R/", "private/R/"); + } + + final RScriptArgumentCollection myArgs; + final boolean exceptOnError; + + public RScriptExecutor(final RScriptArgumentCollection myArgs, final boolean exceptOnError) { + this.myArgs = myArgs; + this.exceptOnError = exceptOnError; + } + + public void callRScripts(String scriptName, String... scriptArgs) { + callRScripts(scriptName, Arrays.asList(scriptArgs)); + } + + public void callRScripts(String scriptName, List scriptArgs) { + try { + final File pathToScript = findScript(scriptName); + if ( pathToScript == null ) return; // we failed but shouldn't exception out + final String argString = Utils.join(" ", scriptArgs); + final String cmdLine = Utils.join(" ", Arrays.asList(myArgs.PATH_TO_RSCRIPT, pathToScript, argString)); + logger.info("Executing RScript: " + cmdLine); + Runtime.getRuntime().exec(cmdLine).waitFor(); + } catch (InterruptedException e) { + generateException(e); + } catch (IOException e) { + generateException("Fatal Exception: Perhaps RScript jobs are being spawned too quickly?", e); + } + } + + public File findScript(final String scriptName) { + for ( String pathToResource : myArgs.PATH_TO_RESOURCES ) { + final File f = new File(pathToResource + "/" + scriptName); + if ( f.exists() ) { + if ( f.canRead() ) + return f; + else + generateException("Script exists but couldn't be read: " + scriptName); + } + } + + generateException("Couldn't find script: " + scriptName + " in " + myArgs.PATH_TO_RSCRIPT); + return null; + } + + private void generateException(String msg) { + generateException(msg, null); + } + + private void generateException(Throwable e) { + generateException("", e); + } + + private void generateException(String msg, Throwable e) { + if ( exceptOnError ) + throw new RuntimeException(msg, e); + else + logger.warn(msg + (e == null ? "" : ":" + e.getMessage())); + } +} diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index d877575df..138003cdd 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -118,7 +118,8 @@ class QCommandLine extends CommandLineProgram with Logging { if ( ! settings.disableJobReport ) { logger.info("Writing JobLogging GATKReport to file " + settings.jobReportFile) QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), settings.jobReportFile) - // todo -- execute Rscript here once generic RScript execution system is implemented } + QJobReport.plotReport(settings.rScriptArgs, settings.jobReportFile) + } } if (!qGraph.success) { diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala index dbc3e3886..46063fc24 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraphSettings.scala @@ -28,6 +28,7 @@ import java.io.File import org.broadinstitute.sting.queue.QSettings import org.broadinstitute.sting.queue.util.SystemUtils import org.broadinstitute.sting.commandline.{Advanced, ArgumentCollection, Argument} +import org.broadinstitute.sting.utils.R.RScriptExecutor /** * Command line options for a QGraph. @@ -76,6 +77,9 @@ class QGraphSettings { @Argument(fullName="disableJobReport", shortName="disabpleJobReport", doc="If provided, we will not create a job report", required=false) var disableJobReport: Boolean = false + @ArgumentCollection + var rScriptArgs = new RScriptExecutor.RScriptArgumentCollection + @ArgumentCollection val qSettings = new QSettings } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index 3e393e4c0..c84f0b17a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -29,22 +29,28 @@ import org.broadinstitute.sting.utils.exceptions.UserException import org.broadinstitute.sting.queue.engine.JobRunInfo import java.io.{FileOutputStream, PrintStream, File} import org.broadinstitute.sting.queue.function.scattergather.{GathererFunction, ScatterFunction} +import org.broadinstitute.sting.utils.R.RScriptExecutor.RScriptArgumentCollection +import org.broadinstitute.sting.utils.R.RScriptExecutor +import org.broadinstitute.sting.queue.QScript /** * A mixin to add Job info to the class */ -// todo -- need to enforce QFunction to have copySettingTo work trait QJobReport extends Logging { self: QFunction => - // todo -- might make more sense to mix in the variables protected var reportGroup: String = null protected var reportFeatures: Map[String, String] = Map() + protected var reportEnabled: Boolean = true + + def includeInReport = reportEnabled + def enableReport() { reportEnabled = true } + def disableReport() { reportEnabled = false } - def includeInReport = getReportGroup != null def setRunInfo(info: JobRunInfo) { logger.info("info " + info) reportFeatures = reportFeatures ++ Map( + "iteration" -> 1, "analysisName" -> self.analysisName, "jobName" -> QJobReport.workAroundSameJobNames(this), "intermediate" -> self.isIntermediate, @@ -53,9 +59,15 @@ trait QJobReport extends Logging { "formattedStartTime" -> info.getFormattedStartTime, "formattedDoneTime" -> info.getFormattedDoneTime, "runtime" -> info.getRuntimeInMs).mapValues((x:Any) => if (x != null) x.toString else "null") + +// // handle the special case of iterations +// reportFeatures.get("iteration") match { +// case None => reportFeatures("iteration") = 1 +// case _ => ; +// } } - def getReportGroup = reportGroup + def getReportGroup = analysisName def getReportFeatures = reportFeatures def getReportFeatureNames: List[String] = getReportFeatures.keys.toList @@ -68,24 +80,20 @@ trait QJobReport extends Logging { def getReportName: String = getReportFeature("jobName") - def configureJobReport(group: String) { - this.reportGroup = group - } - - def configureJobReport(group: String, features: Map[String, Any]) { - this.reportGroup = group + def configureJobReport(features: Map[String, Any]) { this.reportFeatures = features.mapValues(_.toString) } // copy the QJobReport information -- todo : what's the best way to do this? override def copySettingsTo(function: QFunction) { self.copySettingsTo(function) - function.reportGroup = this.reportGroup function.reportFeatures = this.reportFeatures } } object QJobReport { + val JOB_REPORT_QUEUE_SCRIPT = "queueJobReport.R" + // todo -- fixme to have a unique name for Scatter/gather jobs as well var seenCounter = 1 var seenNames = Set[String]() @@ -98,6 +106,12 @@ object QJobReport { stream.close() } + def plotReport(args: RScriptArgumentCollection, jobReportFile: File) { + val executor = new RScriptExecutor(args, false) // don't except on error + val pdf = jobReportFile.getAbsolutePath + ".pdf" + executor.callRScripts(JOB_REPORT_QUEUE_SCRIPT, jobReportFile.getAbsolutePath, pdf) + } + def workAroundSameJobNames(func: QFunction):String = { if ( seenNames.apply(func.jobName) ) { seenCounter += 1