Wrapping ScatterGatherableFunctions with a facade instead of using slower clone library. Will require keeping Clone's facade code in sync with CommandLineFunction but runs *much* faster.

Shell invoking scripts so that even really long shell scripts make it through LSF.
Using the truncated (up to 1000 characters) of the command line for the job name for use with bjobs.
Switched the default from re-running everything to re-running only files that need to be regenerated.  --skip_up_to_date replaced with --start_clean for those who want to regenerate everything.
Updated logging to let users know when the scatter gather generator is running, which still takes a while but is orders of magnatudes faster for large lists of functions.  (40s for a 100 function graph exploding to a 2500 function graph)


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4448 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-10-07 01:19:18 +00:00
parent 04b4adafda
commit db47230dd9
17 changed files with 221 additions and 115 deletions

View File

@ -549,7 +549,7 @@
</target> </target>
<!-- test tribble using the unit tests set in tribble --> <!-- test tribble using the unit tests set in tribble -->
<target name="tribble.test" description="runs the tribble tests" depends="tribble.compile"> <target name="tribble.test" description="runs the tribble tests" depends="tribble.compile" unless="single">
<echo message="Testing the Tribble Library..."/> <echo message="Testing the Tribble Library..."/>
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/> <ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
</target> </target>

View File

@ -40,9 +40,6 @@
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.0" conf="scala->default"/> <dependency org="org.scala-lang" name="scala-compiler" rev="2.8.0" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.0" conf="scala->default"/> <dependency org="org.scala-lang" name="scala-library" rev="2.8.0" conf="scala->default"/>
<!-- Queue additional dependencies -->
<dependency org="uk.com.robust-it" name="cloning" rev="1.7.1" conf="queue->default" />
<!-- findbug dependencies --> <!-- findbug dependencies -->
<dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/> <dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/>
<dependency org="net.sourceforge.findbugs" name="findbugs-ant" rev="1.3.2" conf="findbugs->default"/> <dependency org="net.sourceforge.findbugs" name="findbugs-ant" rev="1.3.2" conf="findbugs->default"/>

View File

@ -26,8 +26,8 @@ class QCommandLine extends CommandLineProgram with Logging {
@Argument(fullName="expanded_dot_graph", shortName="expandedDot", doc="Outputs the queue graph of scatter gather to a .dot file. Otherwise overwrites the dot_graph", required=false) @Argument(fullName="expanded_dot_graph", shortName="expandedDot", doc="Outputs the queue graph of scatter gather to a .dot file. Otherwise overwrites the dot_graph", required=false)
private var expandedDotFile: File = _ private var expandedDotFile: File = _
@Argument(fullName="skip_up_to_date", shortName="skipUpToDate", doc="Does not run command line functions that don't depend on other jobs if the outputs exist and are older than the inputs.", required=false) @Argument(fullName="start_clean", shortName="clean", doc="Runs all command line functions even if the outputs were previously output successfully.", required=false)
private var skipUpToDate = false private var startClean = false
@Argument(fullName="for_reals", shortName="forReals", doc="Run QScripts", required=false) @Hidden @Argument(fullName="for_reals", shortName="forReals", doc="Run QScripts", required=false) @Hidden
private var runScripts = false private var runScripts = false
@ -47,7 +47,7 @@ class QCommandLine extends CommandLineProgram with Logging {
val qGraph = new QGraph val qGraph = new QGraph
qGraph.dryRun = !(run || runScripts) qGraph.dryRun = !(run || runScripts)
qGraph.bsubAllJobs = bsubAllJobs qGraph.bsubAllJobs = bsubAllJobs
qGraph.skipUpToDateJobs = skipUpToDate qGraph.startClean = startClean
qGraph.dotFile = dotFile qGraph.dotFile = dotFile
qGraph.expandedDotFile = expandedDotFile qGraph.expandedDotFile = expandedDotFile
qGraph.qSettings = qSettings qGraph.qSettings = qSettings
@ -71,10 +71,8 @@ class QCommandLine extends CommandLineProgram with Logging {
}) })
if ( ! getStatus ) { if ( ! getStatus ) {
logger.info("Running generated graph")
qGraph.run qGraph.run
} else { } else {
logger.info("Checking pipeline status")
qGraph.checkStatus qGraph.checkStatus
} }

View File

@ -16,19 +16,19 @@ class InProcessRunner(function: InProcessFunction) extends JobRunner with Loggin
logger.info("Starting: " + function.description) logger.info("Starting: " + function.description)
} }
function.doneOutputs.foreach(_.delete) function.doneOutputs.foreach(_.delete())
function.failOutputs.foreach(_.delete) function.failOutputs.foreach(_.delete())
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
try { try {
function.run() function.run()
function.doneOutputs.foreach(_.createNewFile) function.doneOutputs.foreach(_.createNewFile())
runStatus = RunnerStatus.DONE runStatus = RunnerStatus.DONE
logger.info("Done: " + function.description) logger.info("Done: " + function.description)
} catch { } catch {
case e => { case e => {
runStatus = RunnerStatus.FAILED runStatus = RunnerStatus.FAILED
try { try {
function.failOutputs.foreach(_.createNewFile) function.failOutputs.foreach(_.createNewFile())
} catch { } catch {
case _ => /* ignore errors in the exception handler */ case _ => /* ignore errors in the exception handler */
} }

View File

@ -21,6 +21,9 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
/** A temporary job done file to let Queue know that the process exited with an error. */ /** A temporary job done file to let Queue know that the process exited with an error. */
private lazy val jobFailFile = new File(jobStatusPath + ".fail") private lazy val jobFailFile = new File(jobStatusPath + ".fail")
/** A generated exec shell script. */
private var exec: File = _
/** A generated pre-exec shell script. */ /** A generated pre-exec shell script. */
private var preExec: File = _ private var preExec: File = _
@ -38,7 +41,6 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
job.errorFile = function.jobErrorFile job.errorFile = function.jobErrorFile
job.project = function.jobProject job.project = function.jobProject
job.queue = function.jobQueue job.queue = function.jobQueue
job.command = function.commandLine
if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory)) if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory))
job.workingDir = function.commandDirectory job.workingDir = function.commandDirectory
@ -49,10 +51,16 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
if (function.memoryLimit.isDefined) if (function.memoryLimit.isDefined)
job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]") job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
preExec = writePreExec(function) job.name = function.commandLine.take(1000)
// TODO: Look into passing in a single chained script as recommended by Doug instead of pre, exec, and post.
exec = writeExec()
job.command = "sh " + exec
preExec = writePreExec()
job.preExecCommand = "sh " + preExec job.preExecCommand = "sh " + preExec
postExec = writePostExec(function) postExec = writePostExec()
job.postExecCommand = "sh " + postExec job.postExecCommand = "sh " + postExec
if (logger.isDebugEnabled) { if (logger.isDebugEnabled) {
@ -61,6 +69,10 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
logger.info("Starting: " + job.bsubCommand.mkString(" ")) logger.info("Starting: " + job.bsubCommand.mkString(" "))
} }
function.jobOutputFile.delete()
if (function.jobErrorFile != null)
function.jobErrorFile.delete()
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
job.run() job.run()
jobStatusPath = IOUtils.absolute(new File(function.commandDirectory, "." + job.bsubJobId)).toString jobStatusPath = IOUtils.absolute(new File(function.commandDirectory, "." + job.bsubJobId)).toString
@ -98,7 +110,8 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
/** /**
* Removes all temporary files used for this LSF job. * Removes all temporary files used for this LSF job.
*/ */
private def removeTemporaryFiles() = { def removeTemporaryFiles() = {
exec.delete()
preExec.delete() preExec.delete()
postExec.delete() postExec.delete()
jobDoneFile.delete() jobDoneFile.delete()
@ -115,12 +128,21 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
logger.error("Last %d lines of %s:%n%s".format(tailLines.size, errorFile, tailLines.mkString(nl))) logger.error("Last %d lines of %s:%n%s".format(tailLines.size, errorFile, tailLines.mkString(nl)))
} }
/**
* Writes an exec file to cleanup any status files and
* optionally mount any automount directories on the node.
* @return the file path to the pre-exec.
*/
private def writeExec() = {
IOUtils.writeTempFile(function.commandLine, ".exec", "", function.commandDirectory)
}
/** /**
* Writes a pre-exec file to cleanup any status files and * Writes a pre-exec file to cleanup any status files and
* optionally mount any automount directories on the node. * optionally mount any automount directories on the node.
* @return the file path to the pre-exec. * @return the file path to the pre-exec.
*/ */
private def writePreExec(function: CommandLineFunction): File = { private def writePreExec() = {
val preExec = new StringBuilder val preExec = new StringBuilder
preExec.append("rm -f '%s/'.$LSB_JOBID.done%n".format(function.commandDirectory)) preExec.append("rm -f '%s/'.$LSB_JOBID.done%n".format(function.commandDirectory))
@ -138,7 +160,7 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
* Writes a post-exec file to create the status files. * Writes a post-exec file to create the status files.
* @return the file path to the post-exec. * @return the file path to the post-exec.
*/ */
private def writePostExec(function: CommandLineFunction): File = { private def writePostExec() = {
val postExec = new StringBuilder val postExec = new StringBuilder
val touchDone = function.doneOutputs.map("touch '%s'%n".format(_)).mkString val touchDone = function.doneOutputs.map("touch '%s'%n".format(_)).mkString

View File

@ -10,9 +10,9 @@ import org.jgrapht.ext.DOTExporter
import java.io.File import java.io.File
import org.jgrapht.event.{TraversalListenerAdapter, EdgeTraversalEvent} import org.jgrapht.event.{TraversalListenerAdapter, EdgeTraversalEvent}
import org.broadinstitute.sting.queue.{QSettings, QException} import org.broadinstitute.sting.queue.{QSettings, QException}
import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, ScatterGatherableFunction}
import org.broadinstitute.sting.queue.function.{InProcessFunction, CommandLineFunction, QFunction} import org.broadinstitute.sting.queue.function.{InProcessFunction, CommandLineFunction, QFunction}
import org.broadinstitute.sting.queue.util.{JobExitException, LsfKillJob, Logging} import org.broadinstitute.sting.queue.util.{JobExitException, LsfKillJob, Logging}
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, GatherFunction, ScatterGatherableFunction}
/** /**
* The internal dependency tracker between sets of function input and output files. * The internal dependency tracker between sets of function input and output files.
@ -20,7 +20,7 @@ import org.broadinstitute.sting.queue.util.{JobExitException, LsfKillJob, Loggin
class QGraph extends Logging { class QGraph extends Logging {
var dryRun = true var dryRun = true
var bsubAllJobs = false var bsubAllJobs = false
var skipUpToDateJobs = false var startClean = false
var dotFile: File = _ var dotFile: File = _
var expandedDotFile: File = _ var expandedDotFile: File = _
var qSettings: QSettings = _ var qSettings: QSettings = _
@ -42,15 +42,6 @@ class QGraph extends Logging {
} }
} }
private def scatterGatherable(edge: FunctionEdge) = {
edge.function match {
case scatterGather: ScatterGatherableFunction if (scatterGather.scatterGatherable) => true
case _ => false
}
}
/** /**
* Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph. * Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
*/ */
@ -59,6 +50,7 @@ class QGraph extends Logging {
val isReady = numMissingValues == 0 val isReady = numMissingValues == 0
if (isReady || this.dryRun) { if (isReady || this.dryRun) {
logger.info("Running jobs.")
runJobs() runJobs()
} }
@ -73,31 +65,34 @@ class QGraph extends Logging {
} }
private def fillGraph = { private def fillGraph = {
logger.info("Generating graph.")
fill fill
if (dotFile != null) if (dotFile != null)
renderToDot(dotFile) renderToDot(dotFile)
var numMissingValues = validate var numMissingValues = validate
if (numMissingValues == 0 && bsubAllJobs) { if (numMissingValues == 0 && bsubAllJobs) {
logger.debug("Scatter gathering jobs.") logger.info("Generating scatter gather jobs.")
var scatterGathers = List.empty[FunctionEdge] val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
loop({
case edge: FunctionEdge if (scatterGatherable(edge)) =>
scatterGathers :+= edge
})
var addedFunctions = List.empty[QFunction] var addedFunctions = List.empty[QFunction]
for (scatterGather <- scatterGathers) { for (scatterGather <- scatterGathers) {
val functions = scatterGather.function.asInstanceOf[ScatterGatherableFunction].generateFunctions() val functions = scatterGather.asInstanceOf[FunctionEdge]
.function.asInstanceOf[ScatterGatherableFunction]
.generateFunctions()
if (this.debugMode) if (this.debugMode)
logger.debug("Scattered into %d parts: %n%s".format(functions.size, functions.mkString("%n".format()))) logger.debug("Scattered into %d parts: %n%s".format(functions.size, functions.mkString("%n".format())))
addedFunctions ++= functions addedFunctions ++= functions
} }
logger.info("Removing original jobs.")
this.jobGraph.removeAllEdges(scatterGathers) this.jobGraph.removeAllEdges(scatterGathers)
prune prune
logger.info("Adding scatter gather jobs.")
addedFunctions.foreach(this.add(_)) addedFunctions.foreach(this.add(_))
logger.info("Regenerating graph.")
fill fill
val scatterGatherDotFile = if (expandedDotFile != null) expandedDotFile else dotFile val scatterGatherDotFile = if (expandedDotFile != null) expandedDotFile else dotFile
if (scatterGatherDotFile != null) if (scatterGatherDotFile != null)
@ -108,9 +103,22 @@ class QGraph extends Logging {
numMissingValues numMissingValues
} }
private def scatterGatherable(edge: QEdge) = {
edge match {
case functionEdge: FunctionEdge => {
functionEdge.function match {
case scatterGather: ScatterGatherableFunction if (scatterGather.scatterGatherable) => true
case _ => false
}
}
case _ => false
}
}
def checkStatus = { def checkStatus = {
// build up the full DAG with scatter-gather jobs // build up the full DAG with scatter-gather jobs
fillGraph fillGraph
logger.info("Checking pipeline status.")
logStatus logStatus
} }
@ -161,25 +169,18 @@ class QGraph extends Logging {
} }
private def getReadyJobs = { private def getReadyJobs = {
var readyJobs = List.empty[FunctionEdge] jobGraph.edgeSet.filter{
loop({ case f: FunctionEdge =>
case f: FunctionEdge => { this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING
if (this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING) case _ => false
readyJobs :+= f }.map(_.asInstanceOf[FunctionEdge])
}
})
readyJobs
} }
private def getRunningJobs = { private def getRunningJobs = {
var runningJobs = List.empty[FunctionEdge] jobGraph.edgeSet.filter{
loop({ case f: FunctionEdge => f.status == RunnerStatus.RUNNING
case f: FunctionEdge => { case _ => false
if (f.status == RunnerStatus.RUNNING) }.map(_.asInstanceOf[FunctionEdge])
runningJobs :+= f
}
})
runningJobs
} }
/** /**
@ -232,13 +233,13 @@ class QGraph extends Logging {
* Runs the jobs by traversing the graph. * Runs the jobs by traversing the graph.
*/ */
private def runJobs() = { private def runJobs() = {
loop({ case f: FunctionEdge => { foreachFunction(f => {
val isDone = this.skipUpToDateJobs && val isDone = !this.startClean &&
f.status == RunnerStatus.DONE && f.status == RunnerStatus.DONE &&
this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) this.previousFunctions(f).forall(_.status == RunnerStatus.DONE)
if (!isDone) if (!isDone)
f.resetPending() f.resetPending()
}}) })
var readyJobs = getReadyJobs var readyJobs = getReadyJobs
var runningJobs = Set.empty[FunctionEdge] var runningJobs = Set.empty[FunctionEdge]
@ -305,8 +306,8 @@ class QGraph extends Logging {
*/ */
private def logStatus = { private def logStatus = {
var statuses = Map.empty[String, AnalysisStatus] var statuses = Map.empty[String, AnalysisStatus]
loop({ foreachFunction(edgeCLF => {
case edgeCLF: FunctionEdge if (edgeCLF.function.analysisName != null) => if (edgeCLF.function.analysisName != null) {
updateStatus(statuses.get(edgeCLF.function.analysisName) match { updateStatus(statuses.get(edgeCLF.function.analysisName) match {
case Some(status) => status case Some(status) => status
case None => case None =>
@ -314,6 +315,7 @@ class QGraph extends Logging {
statuses += edgeCLF.function.analysisName -> status statuses += edgeCLF.function.analysisName -> status
status status
}, edgeCLF) }, edgeCLF)
}
}) })
statuses.values.toList.sortBy(_.analysisName).foreach(status => { statuses.values.toList.sortBy(_.analysisName).foreach(status => {
@ -343,7 +345,7 @@ class QGraph extends Logging {
private def updateStatus(stats: AnalysisStatus, edge: FunctionEdge) = { private def updateStatus(stats: AnalysisStatus, edge: FunctionEdge) = {
if (edge.function.isInstanceOf[GatherFunction]) { if (edge.function.isInstanceOf[GatherFunction]) {
updateSGStatus(stats.gather, edge) updateSGStatus(stats.gather, edge)
} else if (edge.function.isInstanceOf[ScatterGatherableFunction]) { } else if (edge.function.isInstanceOf[CloneFunction]) {
updateSGStatus(stats.scatter, edge) updateSGStatus(stats.scatter, edge)
} else { } else {
stats.status = edge.status stats.status = edge.status
@ -456,19 +458,14 @@ class QGraph extends Logging {
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0 (jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
/** /**
* Utility function for looping over the internal graph and running functions. * Utility function for running a method over all function edges.
* @param edgeFunction Optional function to run for each edge visited. * @param edgeFunction Function to run for each FunctionEdge.
* @param nodeFunction Optional function to run for each node visited.
*/ */
private def loop(edgeFunction: PartialFunction[QEdge, Unit] = null, nodeFunction: PartialFunction[QNode, Unit] = null) = { private def foreachFunction(f: (FunctionEdge) => Unit) = {
val iterator = new TopologicalOrderIterator(this.jobGraph) jobGraph.edgeSet.foreach{
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QEdge] { case functionEdge: FunctionEdge => f(functionEdge)
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QEdge]) = event.getEdge match { case _ =>
case cmd: FunctionEdge => if (edgeFunction != null && edgeFunction.isDefinedAt(cmd)) edgeFunction(cmd)
case map: MappingEdge => /* do nothing for mapping functions */
} }
})
iterator.foreach(node => if (nodeFunction != null && nodeFunction.isDefinedAt(node)) nodeFunction(node))
} }
/** /**
@ -505,11 +502,11 @@ class QGraph extends Logging {
* Kills any forked jobs still running. * Kills any forked jobs still running.
*/ */
def shutdown() { def shutdown() {
val lsfJobs = getRunningJobs.filter(_.runner.isInstanceOf[LsfJobRunner]).map(_.runner.asInstanceOf[LsfJobRunner].job) val lsfJobRunners = getRunningJobs.filter(_.runner.isInstanceOf[LsfJobRunner]).map(_.runner.asInstanceOf[LsfJobRunner])
if (lsfJobs.size > 0) { if (lsfJobRunners.size > 0) {
for (jobs <- lsfJobs.grouped(10)) { for (jobRunners <- lsfJobRunners.filterNot(_.job.bsubJobId == null).grouped(10)) {
try { try {
val bkill = new LsfKillJob(jobs) val bkill = new LsfKillJob(jobRunners.map(_.job))
logger.info(bkill.command) logger.info(bkill.command)
bkill.run() bkill.run()
} catch { } catch {
@ -518,6 +515,11 @@ class QGraph extends Logging {
case e => case e =>
logger.error("Unable to kill jobs.", e) logger.error("Unable to kill jobs.", e)
} }
try {
jobRunners.foreach(_.removeTemporaryFiles())
} catch {
case e => /* ignore */
}
} }
} }
} }

View File

@ -34,19 +34,22 @@ class ShellJobRunner(function: CommandLineFunction) extends JobRunner with Loggi
logger.info("Errors also written to " + function.jobOutputFile) logger.info("Errors also written to " + function.jobOutputFile)
} }
function.doneOutputs.foreach(_.delete) function.jobOutputFile.delete()
function.failOutputs.foreach(_.delete) if (function.jobErrorFile != null)
function.jobErrorFile.delete()
function.doneOutputs.foreach(_.delete())
function.failOutputs.foreach(_.delete())
runStatus = RunnerStatus.RUNNING runStatus = RunnerStatus.RUNNING
try { try {
job.run() job.run()
function.doneOutputs.foreach(_.createNewFile) function.doneOutputs.foreach(_.createNewFile())
runStatus = RunnerStatus.DONE runStatus = RunnerStatus.DONE
logger.info("Done: " + function.commandLine) logger.info("Done: " + function.commandLine)
} catch { } catch {
case e: JobExitException => case e: JobExitException =>
runStatus = RunnerStatus.FAILED runStatus = RunnerStatus.FAILED
try { try {
function.failOutputs.foreach(_.createNewFile) function.failOutputs.foreach(_.createNewFile())
} catch { } catch {
case _ => /* ignore errors in the exception handler */ case _ => /* ignore errors in the exception handler */
} }

View File

@ -57,7 +57,7 @@ trait CommandLineFunction extends QFunction with Logging {
dirs dirs
} }
override protected def useStatusOutput(file: File) = override def useStatusOutput(file: File) =
file != jobOutputFile && file != jobErrorFile file != jobOutputFile && file != jobErrorFile
override def description = commandLine override def description = commandLine

View File

@ -7,6 +7,6 @@ import java.io.File
*/ */
trait InProcessFunction extends QFunction { trait InProcessFunction extends QFunction {
def run() def run()
protected def useStatusOutput(file: File) = true def useStatusOutput(file: File) = true
def description = this.getClass.getSimpleName def description = this.getClass.getSimpleName
} }

View File

@ -34,7 +34,11 @@ trait QFunction {
*/ */
def dotString = "" def dotString = ""
protected def useStatusOutput(file: File): Boolean /**
* Returns true if the file should be used for status output.
* @return true if the file should be used for status output.
*/
def useStatusOutput(file: File): Boolean
/** /**
* Returns the output files for this function. * Returns the output files for this function.
@ -57,7 +61,7 @@ trait QFunction {
def failOutputs = statusPaths.map(path => new File(path + ".fail")) def failOutputs = statusPaths.map(path => new File(path + ".fail"))
/** The complete list of fields on this CommandLineFunction. */ /** The complete list of fields on this CommandLineFunction. */
lazy val functionFields: List[ArgumentSource] = ParsingEngine.extractArgumentSources(this.getClass).toList lazy val functionFields: List[ArgumentSource] = initFunctionFields
/** The @Input fields on this CommandLineFunction. */ /** The @Input fields on this CommandLineFunction. */
lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input]))
/** The @Output fields on this CommandLineFunction. */ /** The @Output fields on this CommandLineFunction. */
@ -65,6 +69,11 @@ trait QFunction {
/** The @Argument fields on this CommandLineFunction. */ /** The @Argument fields on this CommandLineFunction. */
lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument]))
/**
* Called at most once, returns the list of fields for this function.
*/
protected def initFunctionFields = ParsingEngine.extractArgumentSources(this.getClass).toList
/** /**
* Returns the input files for this function. * Returns the input files for this function.
* @return Set[File] inputs for this function. * @return Set[File] inputs for this function.

View File

@ -0,0 +1,84 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline.ArgumentSource
import java.io.File
/**
* Shadow clones another command line function.
*/
class CloneFunction extends CommandLineFunction {
var originalFunction: ScatterGatherableFunction = _
var index: Int = _
private var overriddenFields = Map.empty[ArgumentSource, Any]
private def withScatterPart[A](f: () => A): A = {
var originalValues = Map.empty[ArgumentSource, Any]
overriddenFields.foreach{
case (field, overrideValue) => {
originalValues += field -> originalFunction.getFieldValue(field)
originalFunction.setFieldValue(field, overrideValue)
}
}
try {
f()
} finally {
originalValues.foreach{
case (name, value) =>
originalFunction.setFieldValue(name, value)
}
}
}
override def dotString = originalFunction.dotString
override def description = originalFunction.description
override protected def initFunctionFields = originalFunction.functionFields
override def useStatusOutput(file: File) =
file != jobOutputFile && file != jobErrorFile && originalFunction.useStatusOutput(file)
override def freezeFieldValues = {
if (this.analysisName == null)
this.analysisName = originalFunction.analysisName
if (this.qSettings == null)
this.qSettings = originalFunction.qSettings
if (this.memoryLimit.isEmpty && originalFunction.memoryLimit.isDefined)
this.memoryLimit = originalFunction.memoryLimit
if (this.jobTempDir == null)
this.jobTempDir = originalFunction.jobTempDir
if (this.jobQueue == null)
this.jobQueue = originalFunction.jobQueue
if (this.jobProject == null)
this.jobProject = originalFunction.jobProject
if (this.jobName == null)
this.jobName = originalFunction.jobName
if (this.jobOutputFile == null)
this.jobOutputFile = overriddenFile("jobOutputFile").get
if (this.jobErrorFile == null)
this.jobErrorFile = overriddenFile("jobErrorFile").getOrElse(null)
super.freezeFieldValues
}
def commandLine = withScatterPart(() => originalFunction.commandLine)
override def getFieldValue(source: ArgumentSource) = {
overriddenFields.get(source) match {
case Some(value) => value.asInstanceOf[AnyRef]
case None => {
val value = originalFunction.getFieldValue(source)
overriddenFields += source -> value
value
}
}
}
override def setFieldValue(source: ArgumentSource, value: Any) = {
overriddenFields += source -> value
}
private def overriddenFile(name: String) = {
overriddenFields
.find{case (key, _) => key.field.getName == name}
.map{case (_, value) => value.asInstanceOf[File]}
}
}

View File

@ -17,7 +17,7 @@ class CreateTempDirsFunction extends InProcessFunction {
@Output(doc="Temporary directories to create") @Output(doc="Temporary directories to create")
var tempDirectories: List[File] = Nil var tempDirectories: List[File] = Nil
override protected def useStatusOutput(file: File) = false override def useStatusOutput(file: File) = false
def run() = tempDirectories.foreach(_.mkdirs) def run() = tempDirectories.foreach(_.mkdirs)
} }

View File

@ -23,9 +23,9 @@ trait GatherFunction extends QFunction {
/** /**
* Sets the clone function creating one of the inputs for this gather function. * Sets the clone function creating one of the inputs for this gather function.
* @param cloneFunction The clone of the ScatterGatherableFunction. * @param cloneFunction The clone wrapper for the original ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param gatherField The field to be gathered. * @param gatherField The field to be gathered.
*/ */
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, gatherField: ArgumentSource) = {} def setCloneFunction(cloneFunction: CloneFunction, index: Int, gatherField: ArgumentSource) = {}
} }

View File

@ -26,9 +26,9 @@ trait ScatterFunction extends QFunction {
/** /**
* Sets the clone function using one of the outputs of this scatter function. * Sets the clone function using one of the outputs of this scatter function.
* @param cloneFunction The clone of the ScatterGatherableFunction. * @param cloneFunction The clone wrapper for the original ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param scatterField The field being scattered. * @param scatterField The field being scattered.
*/ */
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, scatterField: ArgumentSource) = {} def setCloneFunction(cloneFunction: CloneFunction, index: Int, scatterField: ArgumentSource) = {}
} }

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.queue.function.scattergather
import java.io.File import java.io.File
import org.broadinstitute.sting.queue.util._ import org.broadinstitute.sting.queue.util._
import org.broadinstitute.sting.commandline.ArgumentSource import org.broadinstitute.sting.commandline.ArgumentSource
import com.rits.cloning.Cloner
import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction} import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction}
/** /**
@ -59,10 +58,10 @@ trait ScatterGatherableFunction extends CommandLineFunction {
/** /**
* Allows external modification of the cloned function. * Allows external modification of the cloned function.
* @param cloneFunction The clone of this ScatterGatherableFunction * @param cloneFunction A clone wrapper of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/ */
var setupCloneFunction: PartialFunction[(ScatterGatherableFunction, Int), Unit] = _ var setupCloneFunction: PartialFunction[(CloneFunction, Int), Unit] = _
/** /**
* Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories. * Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories.
@ -112,7 +111,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
} }
// Create the clone functions for running the parallel jobs // Create the clone functions for running the parallel jobs
var cloneFunctions = List.empty[ScatterGatherableFunction] var cloneFunctions = List.empty[CloneFunction]
for (i <- 1 to this.scatterCount) { for (i <- 1 to this.scatterCount) {
val cloneFunction = this.newCloneFunction() val cloneFunction = this.newCloneFunction()
initCloneFunction(cloneFunction, i) initCloneFunction(cloneFunction, i)
@ -168,6 +167,11 @@ trait ScatterGatherableFunction extends CommandLineFunction {
protected lazy val scatterField = protected lazy val scatterField =
this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get
/**
* Retrieves the original field value for the scatter field.
*/
protected lazy val originalInput = getFieldFile(scatterField)
/** /**
* Creates a new initialize CreateTempDirsFunction that will create the temporary directories. * Creates a new initialize CreateTempDirsFunction that will create the temporary directories.
* @return A CreateTempDirsFunction that will create the temporary directories. * @return A CreateTempDirsFunction that will create the temporary directories.
@ -224,7 +228,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = { protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = {
scatterFunction.qSettings = this.qSettings scatterFunction.qSettings = this.qSettings
scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName) scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName)
scatterFunction.originalInput = this.getFieldFile(scatterField) scatterFunction.originalInput = this.originalInput
scatterFunction.setOriginalFunction(this, scatterField) scatterFunction.setOriginalFunction(this, scatterField)
if (this.setupScatterFunction != null) if (this.setupScatterFunction != null)
if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField)) if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField))
@ -270,14 +274,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
/** /**
* Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter. * Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter.
* @return A clone of this ScatterGatherableFunction * @return An uninitialized clone wrapper for ScatterGatherableFunction
*/ */
protected def newCloneFunction(): ScatterGatherableFunction = { protected def newCloneFunction() = new CloneFunction
val cloneFunction = ScatterGatherableFunction.cloner.deepClone(this)
// Make sure clone doesn't get scattered
cloneFunction.scatterCount = 1
cloneFunction
}
/** /**
* Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory. * Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory.
@ -285,7 +284,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param cloneFunction The clone of this ScatterGatherableFunction * @param cloneFunction The clone of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/ */
protected def initCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int) = { protected def initCloneFunction(cloneFunction: CloneFunction, index: Int) = {
cloneFunction.originalFunction = this
cloneFunction.index = index
cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index) cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index)
if (this.setupCloneFunction != null) if (this.setupCloneFunction != null)
if (this.setupCloneFunction.isDefinedAt(cloneFunction, index)) if (this.setupCloneFunction.isDefinedAt(cloneFunction, index))
@ -303,7 +304,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param cloneFunction Clone of this ScatterGatherableFunction. * @param cloneFunction Clone of this ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/ */
protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = { protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: CloneFunction, index: Int) = {
// Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter // Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter
val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput) val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput)
scatterFunction.scatterParts :+= scatterPart scatterFunction.scatterParts :+= scatterPart
@ -318,7 +319,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction. * @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction.
* @param gatherField The field to be gathered. * @param gatherField The field to be gathered.
*/ */
protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = { protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: CloneFunction, index: Int) = {
val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory) val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory)
gatherFunction.gatherParts :+= gatherPart gatherFunction.gatherParts :+= gatherPart
gatherFunction.setCloneFunction(cloneFunction, index, gatherField) gatherFunction.setCloneFunction(cloneFunction, index, gatherField)
@ -362,11 +363,3 @@ trait ScatterGatherableFunction extends CommandLineFunction {
*/ */
private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-sg/" + subDir) private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-sg/" + subDir)
} }
/**
* A function that can be run faster by splitting it up into pieces and then joining together the results.
*/
object ScatterGatherableFunction {
/** Used to deep clone a ScatterGatherableFunction. */
private lazy val cloner = new Cloner
}

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.queue.util
/** /**
* bkills a list of lsf jobs. * bkills a list of lsf jobs.
*/ */
class LsfKillJob(jobs: List[LsfJob]) extends CommandLineJob with Logging { class LsfKillJob(jobs: Traversable[LsfJob]) extends CommandLineJob with Logging {
command = "bkill " + jobs.map(_.bsubJobId).mkString(" ") command = "bkill " + jobs.map(_.bsubJobId).mkString(" ")
def run() = { def run() = {

View File

@ -8,7 +8,6 @@
</filesystem> </filesystem>
<ibiblio name="libraries" m2compatible="true" /> <ibiblio name="libraries" m2compatible="true" />
<ibiblio name="libraries_with_inconsistent_poms" checkconsistency="false" m2compatible="true" /> <ibiblio name="libraries_with_inconsistent_poms" checkconsistency="false" m2compatible="true" />
<ibiblio name="ukcomrobust-it" m2compatible="true" root="http://oss.sonatype.org/service/local/repositories/ukcomrobust-it-049/content" />
<ibiblio name="reflections-repo" m2compatible="true" root="http://reflections.googlecode.com/svn/repo" /> <ibiblio name="reflections-repo" m2compatible="true" root="http://reflections.googlecode.com/svn/repo" />
</resolvers> </resolvers>
<modules> <modules>
@ -18,7 +17,6 @@
<module organisation="org.reflections" module="reflections" resolver="reflections-repo" /> <module organisation="org.reflections" module="reflections" resolver="reflections-repo" />
<module organisation="org.broad" module="tribble" resolver="projects" /> <module organisation="org.broad" module="tribble" resolver="projects" />
<module organisation="gov.nist" module="Jama" resolver="projects" /> <module organisation="gov.nist" module="Jama" resolver="projects" />
<module organisation="uk.com.robust-it" module="cloning" resolver="ukcomrobust-it" />
<!-- If colt fixes the version in the pom for 1.2.0 then this line can be removed. --> <!-- If colt fixes the version in the pom for 1.2.0 then this line can be removed. -->
<module organisation="colt" module="colt" resolver="libraries_with_inconsistent_poms" /> <module organisation="colt" module="colt" resolver="libraries_with_inconsistent_poms" />
</modules> </modules>