Wrapping ScatterGatherableFunctions with a facade instead of using slower clone library. Will require keeping Clone's facade code in sync with CommandLineFunction but runs *much* faster.

Shell invoking scripts so that even really long shell scripts make it through LSF.
Using the truncated (up to 1000 characters) of the command line for the job name for use with bjobs.
Switched the default from re-running everything to re-running only files that need to be regenerated.  --skip_up_to_date replaced with --start_clean for those who want to regenerate everything.
Updated logging to let users know when the scatter gather generator is running, which still takes a while but is orders of magnatudes faster for large lists of functions.  (40s for a 100 function graph exploding to a 2500 function graph)


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4448 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-10-07 01:19:18 +00:00
parent 04b4adafda
commit db47230dd9
17 changed files with 221 additions and 115 deletions

View File

@ -549,7 +549,7 @@
</target>
<!-- test tribble using the unit tests set in tribble -->
<target name="tribble.test" description="runs the tribble tests" depends="tribble.compile">
<target name="tribble.test" description="runs the tribble tests" depends="tribble.compile" unless="single">
<echo message="Testing the Tribble Library..."/>
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
</target>

View File

@ -40,9 +40,6 @@
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.0" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.0" conf="scala->default"/>
<!-- Queue additional dependencies -->
<dependency org="uk.com.robust-it" name="cloning" rev="1.7.1" conf="queue->default" />
<!-- findbug dependencies -->
<dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/>
<dependency org="net.sourceforge.findbugs" name="findbugs-ant" rev="1.3.2" conf="findbugs->default"/>

View File

@ -26,8 +26,8 @@ class QCommandLine extends CommandLineProgram with Logging {
@Argument(fullName="expanded_dot_graph", shortName="expandedDot", doc="Outputs the queue graph of scatter gather to a .dot file. Otherwise overwrites the dot_graph", required=false)
private var expandedDotFile: File = _
@Argument(fullName="skip_up_to_date", shortName="skipUpToDate", doc="Does not run command line functions that don't depend on other jobs if the outputs exist and are older than the inputs.", required=false)
private var skipUpToDate = false
@Argument(fullName="start_clean", shortName="clean", doc="Runs all command line functions even if the outputs were previously output successfully.", required=false)
private var startClean = false
@Argument(fullName="for_reals", shortName="forReals", doc="Run QScripts", required=false) @Hidden
private var runScripts = false
@ -47,7 +47,7 @@ class QCommandLine extends CommandLineProgram with Logging {
val qGraph = new QGraph
qGraph.dryRun = !(run || runScripts)
qGraph.bsubAllJobs = bsubAllJobs
qGraph.skipUpToDateJobs = skipUpToDate
qGraph.startClean = startClean
qGraph.dotFile = dotFile
qGraph.expandedDotFile = expandedDotFile
qGraph.qSettings = qSettings
@ -71,10 +71,8 @@ class QCommandLine extends CommandLineProgram with Logging {
})
if ( ! getStatus ) {
logger.info("Running generated graph")
qGraph.run
} else {
logger.info("Checking pipeline status")
qGraph.checkStatus
}

View File

@ -16,19 +16,19 @@ class InProcessRunner(function: InProcessFunction) extends JobRunner with Loggin
logger.info("Starting: " + function.description)
}
function.doneOutputs.foreach(_.delete)
function.failOutputs.foreach(_.delete)
function.doneOutputs.foreach(_.delete())
function.failOutputs.foreach(_.delete())
runStatus = RunnerStatus.RUNNING
try {
function.run()
function.doneOutputs.foreach(_.createNewFile)
function.doneOutputs.foreach(_.createNewFile())
runStatus = RunnerStatus.DONE
logger.info("Done: " + function.description)
} catch {
case e => {
runStatus = RunnerStatus.FAILED
try {
function.failOutputs.foreach(_.createNewFile)
function.failOutputs.foreach(_.createNewFile())
} catch {
case _ => /* ignore errors in the exception handler */
}

View File

@ -21,6 +21,9 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
/** A temporary job done file to let Queue know that the process exited with an error. */
private lazy val jobFailFile = new File(jobStatusPath + ".fail")
/** A generated exec shell script. */
private var exec: File = _
/** A generated pre-exec shell script. */
private var preExec: File = _
@ -38,7 +41,6 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
job.errorFile = function.jobErrorFile
job.project = function.jobProject
job.queue = function.jobQueue
job.command = function.commandLine
if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory))
job.workingDir = function.commandDirectory
@ -49,10 +51,16 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
if (function.memoryLimit.isDefined)
job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
preExec = writePreExec(function)
job.name = function.commandLine.take(1000)
// TODO: Look into passing in a single chained script as recommended by Doug instead of pre, exec, and post.
exec = writeExec()
job.command = "sh " + exec
preExec = writePreExec()
job.preExecCommand = "sh " + preExec
postExec = writePostExec(function)
postExec = writePostExec()
job.postExecCommand = "sh " + postExec
if (logger.isDebugEnabled) {
@ -61,6 +69,10 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
logger.info("Starting: " + job.bsubCommand.mkString(" "))
}
function.jobOutputFile.delete()
if (function.jobErrorFile != null)
function.jobErrorFile.delete()
runStatus = RunnerStatus.RUNNING
job.run()
jobStatusPath = IOUtils.absolute(new File(function.commandDirectory, "." + job.bsubJobId)).toString
@ -98,7 +110,8 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
/**
* Removes all temporary files used for this LSF job.
*/
private def removeTemporaryFiles() = {
def removeTemporaryFiles() = {
exec.delete()
preExec.delete()
postExec.delete()
jobDoneFile.delete()
@ -115,12 +128,21 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
logger.error("Last %d lines of %s:%n%s".format(tailLines.size, errorFile, tailLines.mkString(nl)))
}
/**
* Writes an exec file to cleanup any status files and
* optionally mount any automount directories on the node.
* @return the file path to the pre-exec.
*/
private def writeExec() = {
IOUtils.writeTempFile(function.commandLine, ".exec", "", function.commandDirectory)
}
/**
* Writes a pre-exec file to cleanup any status files and
* optionally mount any automount directories on the node.
* @return the file path to the pre-exec.
*/
private def writePreExec(function: CommandLineFunction): File = {
private def writePreExec() = {
val preExec = new StringBuilder
preExec.append("rm -f '%s/'.$LSB_JOBID.done%n".format(function.commandDirectory))
@ -138,7 +160,7 @@ class LsfJobRunner(function: CommandLineFunction) extends DispatchJobRunner with
* Writes a post-exec file to create the status files.
* @return the file path to the post-exec.
*/
private def writePostExec(function: CommandLineFunction): File = {
private def writePostExec() = {
val postExec = new StringBuilder
val touchDone = function.doneOutputs.map("touch '%s'%n".format(_)).mkString

View File

@ -10,9 +10,9 @@ import org.jgrapht.ext.DOTExporter
import java.io.File
import org.jgrapht.event.{TraversalListenerAdapter, EdgeTraversalEvent}
import org.broadinstitute.sting.queue.{QSettings, QException}
import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, ScatterGatherableFunction}
import org.broadinstitute.sting.queue.function.{InProcessFunction, CommandLineFunction, QFunction}
import org.broadinstitute.sting.queue.util.{JobExitException, LsfKillJob, Logging}
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, GatherFunction, ScatterGatherableFunction}
/**
* The internal dependency tracker between sets of function input and output files.
@ -20,7 +20,7 @@ import org.broadinstitute.sting.queue.util.{JobExitException, LsfKillJob, Loggin
class QGraph extends Logging {
var dryRun = true
var bsubAllJobs = false
var skipUpToDateJobs = false
var startClean = false
var dotFile: File = _
var expandedDotFile: File = _
var qSettings: QSettings = _
@ -42,15 +42,6 @@ class QGraph extends Logging {
}
}
private def scatterGatherable(edge: FunctionEdge) = {
edge.function match {
case scatterGather: ScatterGatherableFunction if (scatterGather.scatterGatherable) => true
case _ => false
}
}
/**
* Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
*/
@ -59,6 +50,7 @@ class QGraph extends Logging {
val isReady = numMissingValues == 0
if (isReady || this.dryRun) {
logger.info("Running jobs.")
runJobs()
}
@ -73,31 +65,34 @@ class QGraph extends Logging {
}
private def fillGraph = {
logger.info("Generating graph.")
fill
if (dotFile != null)
renderToDot(dotFile)
var numMissingValues = validate
if (numMissingValues == 0 && bsubAllJobs) {
logger.debug("Scatter gathering jobs.")
var scatterGathers = List.empty[FunctionEdge]
loop({
case edge: FunctionEdge if (scatterGatherable(edge)) =>
scatterGathers :+= edge
})
logger.info("Generating scatter gather jobs.")
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
var addedFunctions = List.empty[QFunction]
for (scatterGather <- scatterGathers) {
val functions = scatterGather.function.asInstanceOf[ScatterGatherableFunction].generateFunctions()
val functions = scatterGather.asInstanceOf[FunctionEdge]
.function.asInstanceOf[ScatterGatherableFunction]
.generateFunctions()
if (this.debugMode)
logger.debug("Scattered into %d parts: %n%s".format(functions.size, functions.mkString("%n".format())))
addedFunctions ++= functions
}
logger.info("Removing original jobs.")
this.jobGraph.removeAllEdges(scatterGathers)
prune
logger.info("Adding scatter gather jobs.")
addedFunctions.foreach(this.add(_))
logger.info("Regenerating graph.")
fill
val scatterGatherDotFile = if (expandedDotFile != null) expandedDotFile else dotFile
if (scatterGatherDotFile != null)
@ -108,9 +103,22 @@ class QGraph extends Logging {
numMissingValues
}
private def scatterGatherable(edge: QEdge) = {
edge match {
case functionEdge: FunctionEdge => {
functionEdge.function match {
case scatterGather: ScatterGatherableFunction if (scatterGather.scatterGatherable) => true
case _ => false
}
}
case _ => false
}
}
def checkStatus = {
// build up the full DAG with scatter-gather jobs
fillGraph
logger.info("Checking pipeline status.")
logStatus
}
@ -161,25 +169,18 @@ class QGraph extends Logging {
}
private def getReadyJobs = {
var readyJobs = List.empty[FunctionEdge]
loop({
case f: FunctionEdge => {
if (this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING)
readyJobs :+= f
}
})
readyJobs
jobGraph.edgeSet.filter{
case f: FunctionEdge =>
this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING
case _ => false
}.map(_.asInstanceOf[FunctionEdge])
}
private def getRunningJobs = {
var runningJobs = List.empty[FunctionEdge]
loop({
case f: FunctionEdge => {
if (f.status == RunnerStatus.RUNNING)
runningJobs :+= f
}
})
runningJobs
jobGraph.edgeSet.filter{
case f: FunctionEdge => f.status == RunnerStatus.RUNNING
case _ => false
}.map(_.asInstanceOf[FunctionEdge])
}
/**
@ -232,13 +233,13 @@ class QGraph extends Logging {
* Runs the jobs by traversing the graph.
*/
private def runJobs() = {
loop({ case f: FunctionEdge => {
val isDone = this.skipUpToDateJobs &&
foreachFunction(f => {
val isDone = !this.startClean &&
f.status == RunnerStatus.DONE &&
this.previousFunctions(f).forall(_.status == RunnerStatus.DONE)
if (!isDone)
f.resetPending()
}})
})
var readyJobs = getReadyJobs
var runningJobs = Set.empty[FunctionEdge]
@ -305,8 +306,8 @@ class QGraph extends Logging {
*/
private def logStatus = {
var statuses = Map.empty[String, AnalysisStatus]
loop({
case edgeCLF: FunctionEdge if (edgeCLF.function.analysisName != null) =>
foreachFunction(edgeCLF => {
if (edgeCLF.function.analysisName != null) {
updateStatus(statuses.get(edgeCLF.function.analysisName) match {
case Some(status) => status
case None =>
@ -314,6 +315,7 @@ class QGraph extends Logging {
statuses += edgeCLF.function.analysisName -> status
status
}, edgeCLF)
}
})
statuses.values.toList.sortBy(_.analysisName).foreach(status => {
@ -343,7 +345,7 @@ class QGraph extends Logging {
private def updateStatus(stats: AnalysisStatus, edge: FunctionEdge) = {
if (edge.function.isInstanceOf[GatherFunction]) {
updateSGStatus(stats.gather, edge)
} else if (edge.function.isInstanceOf[ScatterGatherableFunction]) {
} else if (edge.function.isInstanceOf[CloneFunction]) {
updateSGStatus(stats.scatter, edge)
} else {
stats.status = edge.status
@ -456,19 +458,14 @@ class QGraph extends Logging {
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
/**
* Utility function for looping over the internal graph and running functions.
* @param edgeFunction Optional function to run for each edge visited.
* @param nodeFunction Optional function to run for each node visited.
* Utility function for running a method over all function edges.
* @param edgeFunction Function to run for each FunctionEdge.
*/
private def loop(edgeFunction: PartialFunction[QEdge, Unit] = null, nodeFunction: PartialFunction[QNode, Unit] = null) = {
val iterator = new TopologicalOrderIterator(this.jobGraph)
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QEdge] {
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QEdge]) = event.getEdge match {
case cmd: FunctionEdge => if (edgeFunction != null && edgeFunction.isDefinedAt(cmd)) edgeFunction(cmd)
case map: MappingEdge => /* do nothing for mapping functions */
}
})
iterator.foreach(node => if (nodeFunction != null && nodeFunction.isDefinedAt(node)) nodeFunction(node))
private def foreachFunction(f: (FunctionEdge) => Unit) = {
jobGraph.edgeSet.foreach{
case functionEdge: FunctionEdge => f(functionEdge)
case _ =>
}
}
/**
@ -482,7 +479,7 @@ class QGraph extends Logging {
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
// todo -- visualizeString() command, or something that shows inputs / outputs
val ve = new org.jgrapht.ext.EdgeNameProvider[QEdge] {
def getEdgeName(function: QEdge) = if (function.dotString == null) "" else function.dotString.replace("\"", "\\\"")
def getEdgeName(function: QEdge) = if (function.dotString == null) "" else function.dotString.replace("\"", "\\\"")
}
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
@ -505,11 +502,11 @@ class QGraph extends Logging {
* Kills any forked jobs still running.
*/
def shutdown() {
val lsfJobs = getRunningJobs.filter(_.runner.isInstanceOf[LsfJobRunner]).map(_.runner.asInstanceOf[LsfJobRunner].job)
if (lsfJobs.size > 0) {
for (jobs <- lsfJobs.grouped(10)) {
val lsfJobRunners = getRunningJobs.filter(_.runner.isInstanceOf[LsfJobRunner]).map(_.runner.asInstanceOf[LsfJobRunner])
if (lsfJobRunners.size > 0) {
for (jobRunners <- lsfJobRunners.filterNot(_.job.bsubJobId == null).grouped(10)) {
try {
val bkill = new LsfKillJob(jobs)
val bkill = new LsfKillJob(jobRunners.map(_.job))
logger.info(bkill.command)
bkill.run()
} catch {
@ -518,6 +515,11 @@ class QGraph extends Logging {
case e =>
logger.error("Unable to kill jobs.", e)
}
try {
jobRunners.foreach(_.removeTemporaryFiles())
} catch {
case e => /* ignore */
}
}
}
}

View File

@ -34,19 +34,22 @@ class ShellJobRunner(function: CommandLineFunction) extends JobRunner with Loggi
logger.info("Errors also written to " + function.jobOutputFile)
}
function.doneOutputs.foreach(_.delete)
function.failOutputs.foreach(_.delete)
function.jobOutputFile.delete()
if (function.jobErrorFile != null)
function.jobErrorFile.delete()
function.doneOutputs.foreach(_.delete())
function.failOutputs.foreach(_.delete())
runStatus = RunnerStatus.RUNNING
try {
job.run()
function.doneOutputs.foreach(_.createNewFile)
function.doneOutputs.foreach(_.createNewFile())
runStatus = RunnerStatus.DONE
logger.info("Done: " + function.commandLine)
} catch {
case e: JobExitException =>
runStatus = RunnerStatus.FAILED
try {
function.failOutputs.foreach(_.createNewFile)
function.failOutputs.foreach(_.createNewFile())
} catch {
case _ => /* ignore errors in the exception handler */
}

View File

@ -57,7 +57,7 @@ trait CommandLineFunction extends QFunction with Logging {
dirs
}
override protected def useStatusOutput(file: File) =
override def useStatusOutput(file: File) =
file != jobOutputFile && file != jobErrorFile
override def description = commandLine

View File

@ -7,6 +7,6 @@ import java.io.File
*/
trait InProcessFunction extends QFunction {
def run()
protected def useStatusOutput(file: File) = true
def useStatusOutput(file: File) = true
def description = this.getClass.getSimpleName
}

View File

@ -34,7 +34,11 @@ trait QFunction {
*/
def dotString = ""
protected def useStatusOutput(file: File): Boolean
/**
* Returns true if the file should be used for status output.
* @return true if the file should be used for status output.
*/
def useStatusOutput(file: File): Boolean
/**
* Returns the output files for this function.
@ -57,7 +61,7 @@ trait QFunction {
def failOutputs = statusPaths.map(path => new File(path + ".fail"))
/** The complete list of fields on this CommandLineFunction. */
lazy val functionFields: List[ArgumentSource] = ParsingEngine.extractArgumentSources(this.getClass).toList
lazy val functionFields: List[ArgumentSource] = initFunctionFields
/** The @Input fields on this CommandLineFunction. */
lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input]))
/** The @Output fields on this CommandLineFunction. */
@ -65,6 +69,11 @@ trait QFunction {
/** The @Argument fields on this CommandLineFunction. */
lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument]))
/**
* Called at most once, returns the list of fields for this function.
*/
protected def initFunctionFields = ParsingEngine.extractArgumentSources(this.getClass).toList
/**
* Returns the input files for this function.
* @return Set[File] inputs for this function.

View File

@ -0,0 +1,84 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline.ArgumentSource
import java.io.File
/**
* Shadow clones another command line function.
*/
class CloneFunction extends CommandLineFunction {
var originalFunction: ScatterGatherableFunction = _
var index: Int = _
private var overriddenFields = Map.empty[ArgumentSource, Any]
private def withScatterPart[A](f: () => A): A = {
var originalValues = Map.empty[ArgumentSource, Any]
overriddenFields.foreach{
case (field, overrideValue) => {
originalValues += field -> originalFunction.getFieldValue(field)
originalFunction.setFieldValue(field, overrideValue)
}
}
try {
f()
} finally {
originalValues.foreach{
case (name, value) =>
originalFunction.setFieldValue(name, value)
}
}
}
override def dotString = originalFunction.dotString
override def description = originalFunction.description
override protected def initFunctionFields = originalFunction.functionFields
override def useStatusOutput(file: File) =
file != jobOutputFile && file != jobErrorFile && originalFunction.useStatusOutput(file)
override def freezeFieldValues = {
if (this.analysisName == null)
this.analysisName = originalFunction.analysisName
if (this.qSettings == null)
this.qSettings = originalFunction.qSettings
if (this.memoryLimit.isEmpty && originalFunction.memoryLimit.isDefined)
this.memoryLimit = originalFunction.memoryLimit
if (this.jobTempDir == null)
this.jobTempDir = originalFunction.jobTempDir
if (this.jobQueue == null)
this.jobQueue = originalFunction.jobQueue
if (this.jobProject == null)
this.jobProject = originalFunction.jobProject
if (this.jobName == null)
this.jobName = originalFunction.jobName
if (this.jobOutputFile == null)
this.jobOutputFile = overriddenFile("jobOutputFile").get
if (this.jobErrorFile == null)
this.jobErrorFile = overriddenFile("jobErrorFile").getOrElse(null)
super.freezeFieldValues
}
def commandLine = withScatterPart(() => originalFunction.commandLine)
override def getFieldValue(source: ArgumentSource) = {
overriddenFields.get(source) match {
case Some(value) => value.asInstanceOf[AnyRef]
case None => {
val value = originalFunction.getFieldValue(source)
overriddenFields += source -> value
value
}
}
}
override def setFieldValue(source: ArgumentSource, value: Any) = {
overriddenFields += source -> value
}
private def overriddenFile(name: String) = {
overriddenFields
.find{case (key, _) => key.field.getName == name}
.map{case (_, value) => value.asInstanceOf[File]}
}
}

View File

@ -17,7 +17,7 @@ class CreateTempDirsFunction extends InProcessFunction {
@Output(doc="Temporary directories to create")
var tempDirectories: List[File] = Nil
override protected def useStatusOutput(file: File) = false
override def useStatusOutput(file: File) = false
def run() = tempDirectories.foreach(_.mkdirs)
}

View File

@ -23,9 +23,9 @@ trait GatherFunction extends QFunction {
/**
* Sets the clone function creating one of the inputs for this gather function.
* @param cloneFunction The clone of the ScatterGatherableFunction.
* @param cloneFunction The clone wrapper for the original ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param gatherField The field to be gathered.
*/
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, gatherField: ArgumentSource) = {}
def setCloneFunction(cloneFunction: CloneFunction, index: Int, gatherField: ArgumentSource) = {}
}

View File

@ -26,9 +26,9 @@ trait ScatterFunction extends QFunction {
/**
* Sets the clone function using one of the outputs of this scatter function.
* @param cloneFunction The clone of the ScatterGatherableFunction.
* @param cloneFunction The clone wrapper for the original ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param scatterField The field being scattered.
*/
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, scatterField: ArgumentSource) = {}
def setCloneFunction(cloneFunction: CloneFunction, index: Int, scatterField: ArgumentSource) = {}
}

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.queue.util._
import org.broadinstitute.sting.commandline.ArgumentSource
import com.rits.cloning.Cloner
import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction}
/**
@ -59,10 +58,10 @@ trait ScatterGatherableFunction extends CommandLineFunction {
/**
* Allows external modification of the cloned function.
* @param cloneFunction The clone of this ScatterGatherableFunction
* @param cloneFunction A clone wrapper of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
var setupCloneFunction: PartialFunction[(ScatterGatherableFunction, Int), Unit] = _
var setupCloneFunction: PartialFunction[(CloneFunction, Int), Unit] = _
/**
* Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories.
@ -112,7 +111,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
}
// Create the clone functions for running the parallel jobs
var cloneFunctions = List.empty[ScatterGatherableFunction]
var cloneFunctions = List.empty[CloneFunction]
for (i <- 1 to this.scatterCount) {
val cloneFunction = this.newCloneFunction()
initCloneFunction(cloneFunction, i)
@ -168,6 +167,11 @@ trait ScatterGatherableFunction extends CommandLineFunction {
protected lazy val scatterField =
this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get
/**
* Retrieves the original field value for the scatter field.
*/
protected lazy val originalInput = getFieldFile(scatterField)
/**
* Creates a new initialize CreateTempDirsFunction that will create the temporary directories.
* @return A CreateTempDirsFunction that will create the temporary directories.
@ -224,7 +228,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = {
scatterFunction.qSettings = this.qSettings
scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName)
scatterFunction.originalInput = this.getFieldFile(scatterField)
scatterFunction.originalInput = this.originalInput
scatterFunction.setOriginalFunction(this, scatterField)
if (this.setupScatterFunction != null)
if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField))
@ -270,14 +274,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
/**
* Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter.
* @return A clone of this ScatterGatherableFunction
* @return An uninitialized clone wrapper for ScatterGatherableFunction
*/
protected def newCloneFunction(): ScatterGatherableFunction = {
val cloneFunction = ScatterGatherableFunction.cloner.deepClone(this)
// Make sure clone doesn't get scattered
cloneFunction.scatterCount = 1
cloneFunction
}
protected def newCloneFunction() = new CloneFunction
/**
* Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory.
@ -285,7 +284,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param cloneFunction The clone of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
protected def initCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int) = {
protected def initCloneFunction(cloneFunction: CloneFunction, index: Int) = {
cloneFunction.originalFunction = this
cloneFunction.index = index
cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index)
if (this.setupCloneFunction != null)
if (this.setupCloneFunction.isDefinedAt(cloneFunction, index))
@ -303,7 +304,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param cloneFunction Clone of this ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: CloneFunction, index: Int) = {
// Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter
val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput)
scatterFunction.scatterParts :+= scatterPart
@ -318,7 +319,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
* @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction.
* @param gatherField The field to be gathered.
*/
protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: CloneFunction, index: Int) = {
val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory)
gatherFunction.gatherParts :+= gatherPart
gatherFunction.setCloneFunction(cloneFunction, index, gatherField)
@ -362,11 +363,3 @@ trait ScatterGatherableFunction extends CommandLineFunction {
*/
private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-sg/" + subDir)
}
/**
* A function that can be run faster by splitting it up into pieces and then joining together the results.
*/
object ScatterGatherableFunction {
/** Used to deep clone a ScatterGatherableFunction. */
private lazy val cloner = new Cloner
}

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.queue.util
/**
* bkills a list of lsf jobs.
*/
class LsfKillJob(jobs: List[LsfJob]) extends CommandLineJob with Logging {
class LsfKillJob(jobs: Traversable[LsfJob]) extends CommandLineJob with Logging {
command = "bkill " + jobs.map(_.bsubJobId).mkString(" ")
def run() = {

View File

@ -8,7 +8,6 @@
</filesystem>
<ibiblio name="libraries" m2compatible="true" />
<ibiblio name="libraries_with_inconsistent_poms" checkconsistency="false" m2compatible="true" />
<ibiblio name="ukcomrobust-it" m2compatible="true" root="http://oss.sonatype.org/service/local/repositories/ukcomrobust-it-049/content" />
<ibiblio name="reflections-repo" m2compatible="true" root="http://reflections.googlecode.com/svn/repo" />
</resolvers>
<modules>
@ -18,7 +17,6 @@
<module organisation="org.reflections" module="reflections" resolver="reflections-repo" />
<module organisation="org.broad" module="tribble" resolver="projects" />
<module organisation="gov.nist" module="Jama" resolver="projects" />
<module organisation="uk.com.robust-it" module="cloning" resolver="ukcomrobust-it" />
<!-- If colt fixes the version in the pom for 1.2.0 then this line can be removed. -->
<module organisation="colt" module="colt" resolver="libraries_with_inconsistent_poms" />
</modules>