2010-06-15 12:43:46 +08:00
|
|
|
package org.broadinstitute.sting.queue.engine
|
|
|
|
|
|
2010-07-16 06:32:48 +08:00
|
|
|
import org.jgrapht.traverse.TopologicalOrderIterator
|
2010-06-15 12:43:46 +08:00
|
|
|
import org.jgrapht.graph.SimpleDirectedGraph
|
|
|
|
|
import scala.collection.JavaConversions
|
2010-06-23 02:39:20 +08:00
|
|
|
import scala.collection.JavaConversions._
|
|
|
|
|
import org.jgrapht.alg.CycleDetector
|
|
|
|
|
import org.jgrapht.EdgeFactory
|
2010-07-16 06:32:48 +08:00
|
|
|
import org.jgrapht.ext.DOTExporter
|
2010-08-10 00:42:48 +08:00
|
|
|
import java.io.File
|
2010-08-12 05:58:26 +08:00
|
|
|
import org.jgrapht.event.{TraversalListenerAdapter, EdgeTraversalEvent}
|
2010-10-23 06:22:30 +08:00
|
|
|
import org.broadinstitute.sting.queue.QException
|
2010-10-07 02:29:56 +08:00
|
|
|
import org.broadinstitute.sting.queue.function.{InProcessFunction, CommandLineFunction, QFunction}
|
2010-10-07 09:19:18 +08:00
|
|
|
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, GatherFunction, ScatterGatherableFunction}
|
2010-10-14 23:58:52 +08:00
|
|
|
import org.broadinstitute.sting.queue.util.{EmailMessage, JobExitException, LsfKillJob, Logging}
|
2010-10-19 06:11:14 +08:00
|
|
|
import org.apache.commons.lang.StringUtils
|
2010-06-15 12:43:46 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* The internal dependency tracker between sets of function input and output files.
|
|
|
|
|
*/
|
2010-06-15 12:43:46 +08:00
|
|
|
class QGraph extends Logging {
|
2010-10-23 06:22:30 +08:00
|
|
|
var settings: QGraphSettings = _
|
2010-08-13 23:54:08 +08:00
|
|
|
var debugMode = false
|
2010-10-14 23:58:52 +08:00
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
private def dryRun = !settings.run
|
2010-08-12 05:58:26 +08:00
|
|
|
private val jobGraph = newGraph
|
2010-10-16 01:01:36 +08:00
|
|
|
private var shuttingDown = false
|
|
|
|
|
private val nl = "%n".format()
|
2010-06-15 12:43:46 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Adds a QScript created CommandLineFunction to the graph.
|
|
|
|
|
* @param command Function to add to the graph.
|
|
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
def add(command: QFunction) {
|
|
|
|
|
try {
|
2010-10-23 06:22:30 +08:00
|
|
|
command.qSettings = settings.qSettings
|
2010-10-07 02:29:56 +08:00
|
|
|
command.freeze
|
|
|
|
|
addEdge(new FunctionEdge(command))
|
|
|
|
|
} catch {
|
|
|
|
|
case e: Exception =>
|
|
|
|
|
throw new QException("Error adding function: " + command, e)
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-12 05:58:26 +08:00
|
|
|
/**
|
|
|
|
|
* Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
|
|
|
|
|
*/
|
|
|
|
|
def run = {
|
2010-10-07 02:29:56 +08:00
|
|
|
val numMissingValues = fillGraph
|
|
|
|
|
val isReady = numMissingValues == 0
|
|
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.getStatus) {
|
|
|
|
|
logger.info("Checking pipeline status.")
|
|
|
|
|
logStatus()
|
|
|
|
|
} else if (this.dryRun) {
|
2010-10-08 03:08:02 +08:00
|
|
|
dryRunJobs()
|
|
|
|
|
} else if (isReady) {
|
2010-10-07 09:19:18 +08:00
|
|
|
logger.info("Running jobs.")
|
2010-10-07 02:29:56 +08:00
|
|
|
runJobs()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (numMissingValues > 0) {
|
|
|
|
|
logger.error("Total missing values: " + numMissingValues)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isReady && this.dryRun) {
|
|
|
|
|
logger.info("Dry run completed successfully!")
|
|
|
|
|
logger.info("Re-run with \"-run\" to execute the functions.")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def fillGraph = {
|
2010-10-07 09:19:18 +08:00
|
|
|
logger.info("Generating graph.")
|
2010-08-12 05:58:26 +08:00
|
|
|
fill
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.dotFile != null)
|
|
|
|
|
renderToDot(settings.dotFile)
|
2010-08-12 05:58:26 +08:00
|
|
|
var numMissingValues = validate
|
|
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
if (numMissingValues == 0 && settings.bsubAllJobs) {
|
2010-10-07 09:19:18 +08:00
|
|
|
logger.info("Generating scatter gather jobs.")
|
|
|
|
|
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
var addedFunctions = List.empty[QFunction]
|
2010-08-12 05:58:26 +08:00
|
|
|
for (scatterGather <- scatterGathers) {
|
2010-10-07 09:19:18 +08:00
|
|
|
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
|
|
|
|
.function.asInstanceOf[ScatterGatherableFunction]
|
|
|
|
|
.generateFunctions()
|
2010-08-13 23:54:08 +08:00
|
|
|
if (this.debugMode)
|
2010-10-16 01:01:36 +08:00
|
|
|
logger.debug("Scattered into %d parts: %n%s".format(functions.size, functions.mkString(nl)))
|
2010-08-12 05:58:26 +08:00
|
|
|
addedFunctions ++= functions
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-07 09:19:18 +08:00
|
|
|
logger.info("Removing original jobs.")
|
2010-08-12 05:58:26 +08:00
|
|
|
this.jobGraph.removeAllEdges(scatterGathers)
|
|
|
|
|
prune
|
2010-10-07 09:19:18 +08:00
|
|
|
|
|
|
|
|
logger.info("Adding scatter gather jobs.")
|
2010-10-07 02:29:56 +08:00
|
|
|
addedFunctions.foreach(this.add(_))
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-07 09:19:18 +08:00
|
|
|
logger.info("Regenerating graph.")
|
2010-08-12 05:58:26 +08:00
|
|
|
fill
|
2010-10-23 06:22:30 +08:00
|
|
|
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
2010-08-12 05:58:26 +08:00
|
|
|
if (scatterGatherDotFile != null)
|
|
|
|
|
renderToDot(scatterGatherDotFile)
|
|
|
|
|
numMissingValues = validate
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
numMissingValues
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-07 09:19:18 +08:00
|
|
|
private def scatterGatherable(edge: QEdge) = {
|
|
|
|
|
edge match {
|
|
|
|
|
case functionEdge: FunctionEdge => {
|
|
|
|
|
functionEdge.function match {
|
|
|
|
|
case scatterGather: ScatterGatherableFunction if (scatterGather.scatterGatherable) => true
|
|
|
|
|
case _ => false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case _ => false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-12 05:58:26 +08:00
|
|
|
/**
|
2010-10-07 02:29:56 +08:00
|
|
|
* Walks up the graph looking for the previous command line edges.
|
2010-08-12 05:58:26 +08:00
|
|
|
* @param function Function to examine for a previous command line job.
|
|
|
|
|
* @param qGraph The graph that contains the jobs.
|
|
|
|
|
* @return A list of prior jobs.
|
|
|
|
|
*/
|
2010-10-19 06:11:14 +08:00
|
|
|
private def previousFunctions(edge: QEdge): List[FunctionEdge] = {
|
2010-10-07 02:29:56 +08:00
|
|
|
var previous = List.empty[FunctionEdge]
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
val source = this.jobGraph.getEdgeSource(edge)
|
2010-08-12 05:58:26 +08:00
|
|
|
for (incomingEdge <- this.jobGraph.incomingEdgesOf(source)) {
|
|
|
|
|
incomingEdge match {
|
|
|
|
|
|
2010-10-19 06:11:14 +08:00
|
|
|
// Stop recursing when we find a job along the edge and return its job id
|
2010-10-07 02:29:56 +08:00
|
|
|
case functionEdge: FunctionEdge => previous :+= functionEdge
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
// For any other type of edge find the jobs preceding the edge
|
|
|
|
|
case edge: QEdge => previous ++= previousFunctions(edge)
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
previous
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Fills in the graph using mapping functions, then removes out of date
|
|
|
|
|
* jobs, then cleans up mapping functions and nodes that aren't need.
|
|
|
|
|
*/
|
|
|
|
|
private def fill = {
|
|
|
|
|
fillIn
|
|
|
|
|
prune
|
|
|
|
|
}
|
|
|
|
|
|
2010-06-15 12:43:46 +08:00
|
|
|
/**
|
|
|
|
|
* Looks through functions with multiple inputs and outputs and adds mapping functions for single inputs and outputs.
|
|
|
|
|
*/
|
2010-08-12 05:58:26 +08:00
|
|
|
private def fillIn = {
|
2010-06-15 12:43:46 +08:00
|
|
|
// clone since edgeSet is backed by the graph
|
2010-08-12 05:58:26 +08:00
|
|
|
JavaConversions.asSet(jobGraph.edgeSet).clone.foreach {
|
2010-10-07 02:29:56 +08:00
|
|
|
case cmd: FunctionEdge => {
|
2010-08-12 05:58:26 +08:00
|
|
|
addCollectionOutputs(cmd.outputs)
|
|
|
|
|
addCollectionInputs(cmd.inputs)
|
|
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
case map: MappingEdge => /* do nothing for mapping edges */
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
private def getReadyJobs = {
|
2010-10-07 09:19:18 +08:00
|
|
|
jobGraph.edgeSet.filter{
|
|
|
|
|
case f: FunctionEdge =>
|
|
|
|
|
this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING
|
|
|
|
|
case _ => false
|
2010-10-16 04:00:35 +08:00
|
|
|
}.map(_.asInstanceOf[FunctionEdge]).toList.sortWith(compare(_,_))
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
private def getRunningJobs = {
|
2010-10-07 09:19:18 +08:00
|
|
|
jobGraph.edgeSet.filter{
|
|
|
|
|
case f: FunctionEdge => f.status == RunnerStatus.RUNNING
|
|
|
|
|
case _ => false
|
2010-10-16 04:00:35 +08:00
|
|
|
}.map(_.asInstanceOf[FunctionEdge]).toList.sortWith(compare(_,_))
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2010-08-13 23:54:08 +08:00
|
|
|
* Removes mapping edges that aren't being used, and nodes that don't belong to anything.
|
2010-08-12 05:58:26 +08:00
|
|
|
*/
|
|
|
|
|
private def prune = {
|
2010-06-23 02:39:20 +08:00
|
|
|
var pruning = true
|
|
|
|
|
while (pruning) {
|
|
|
|
|
pruning = false
|
|
|
|
|
val filler = jobGraph.edgeSet.filter(isFiller(_))
|
|
|
|
|
if (filler.size > 0) {
|
|
|
|
|
jobGraph.removeAllEdges(filler)
|
|
|
|
|
pruning = true
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
|
|
|
|
jobGraph.removeAllVertices(jobGraph.vertexSet.filter(isOrphan(_)))
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
2010-08-12 05:58:26 +08:00
|
|
|
* Validates that the functions in the graph have no missing values and that there are no cycles.
|
|
|
|
|
* @return Number of missing values.
|
2010-08-10 00:42:48 +08:00
|
|
|
*/
|
2010-08-12 05:58:26 +08:00
|
|
|
private def validate = {
|
|
|
|
|
var numMissingValues = 0
|
|
|
|
|
JavaConversions.asSet(jobGraph.edgeSet).foreach {
|
2010-10-07 02:29:56 +08:00
|
|
|
case cmd: FunctionEdge =>
|
|
|
|
|
val missingFieldValues = cmd.function.missingFields
|
2010-08-12 05:58:26 +08:00
|
|
|
if (missingFieldValues.size > 0) {
|
|
|
|
|
numMissingValues += missingFieldValues.size
|
2010-10-07 02:29:56 +08:00
|
|
|
logger.error("Missing %s values for function: %s".format(missingFieldValues.size, cmd.function.description))
|
2010-08-12 05:58:26 +08:00
|
|
|
for (missing <- missingFieldValues)
|
|
|
|
|
logger.error(" " + missing)
|
|
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
case map: MappingEdge => /* do nothing for mapping edges */
|
2010-08-10 00:42:48 +08:00
|
|
|
}
|
|
|
|
|
|
2010-06-23 02:39:20 +08:00
|
|
|
val detector = new CycleDetector(jobGraph)
|
|
|
|
|
if (detector.detectCycles) {
|
|
|
|
|
logger.error("Cycles were detected in the graph:")
|
|
|
|
|
for (cycle <- detector.findCycles)
|
|
|
|
|
logger.error(" " + cycle)
|
2010-08-12 05:58:26 +08:00
|
|
|
throw new QException("Cycles were detected in the graph.")
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-12 05:58:26 +08:00
|
|
|
numMissingValues
|
|
|
|
|
}
|
2010-08-10 00:42:48 +08:00
|
|
|
|
2010-10-08 03:08:02 +08:00
|
|
|
/**
|
|
|
|
|
* Dry-runs the jobs by traversing the graph.
|
|
|
|
|
*/
|
|
|
|
|
private def dryRunJobs() = {
|
2010-10-21 05:43:52 +08:00
|
|
|
updateGraphStatus(false)
|
|
|
|
|
traverseFunctions(edge => logEdge(edge))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def logEdge(edge: FunctionEdge) = {
|
|
|
|
|
logger.info("-------")
|
|
|
|
|
logger.info(StringUtils.capitalize(edge.status.toString) + ": " + edge.function.description)
|
|
|
|
|
if (logger.isDebugEnabled)
|
|
|
|
|
logger.debug(edge.function.commandDirectory + " > " + edge.function.description)
|
|
|
|
|
logger.info("Log: " + edge.function.jobOutputFile.getAbsolutePath)
|
|
|
|
|
if (edge.function.jobErrorFile != null)
|
|
|
|
|
logger.info("Error: " + edge.function.jobErrorFile.getAbsolutePath)
|
2010-10-08 03:08:02 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
/**
|
|
|
|
|
* Logs job statuses by traversing the graph and looking for status-related files
|
|
|
|
|
*/
|
|
|
|
|
private def logStatus() = {
|
|
|
|
|
updateGraphStatus(false)
|
|
|
|
|
doStatus(status => logger.info(status))
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-12 05:58:26 +08:00
|
|
|
/**
|
|
|
|
|
* Runs the jobs by traversing the graph.
|
|
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private def runJobs() = {
|
2010-10-14 23:58:52 +08:00
|
|
|
try {
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.startFromScratch) {
|
2010-10-21 05:43:52 +08:00
|
|
|
logger.info("Removing outputs from previous runs.")
|
|
|
|
|
foreachFunction(_.resetToPending(true))
|
|
|
|
|
} else
|
|
|
|
|
updateGraphStatus(true)
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-14 23:58:52 +08:00
|
|
|
var readyJobs = getReadyJobs
|
|
|
|
|
var runningJobs = Set.empty[FunctionEdge]
|
2010-10-16 01:01:36 +08:00
|
|
|
while (!shuttingDown && readyJobs.size + runningJobs.size > 0) {
|
2010-10-14 23:58:52 +08:00
|
|
|
var exitedJobs = List.empty[FunctionEdge]
|
|
|
|
|
var failedJobs = List.empty[FunctionEdge]
|
|
|
|
|
|
|
|
|
|
runningJobs.foreach(runner => runner.status match {
|
|
|
|
|
case RunnerStatus.RUNNING => /* do nothing while still running */
|
|
|
|
|
case RunnerStatus.FAILED => exitedJobs :+= runner; failedJobs :+= runner
|
|
|
|
|
case RunnerStatus.DONE => exitedJobs :+= runner
|
|
|
|
|
})
|
|
|
|
|
exitedJobs.foreach(runner => runningJobs -= runner)
|
|
|
|
|
|
|
|
|
|
readyJobs.foreach(f => {
|
|
|
|
|
f.runner = newRunner(f.function)
|
|
|
|
|
f.runner.start()
|
|
|
|
|
f.status match {
|
|
|
|
|
case RunnerStatus.RUNNING => runningJobs += f
|
|
|
|
|
case RunnerStatus.FAILED => failedJobs :+= f
|
|
|
|
|
case RunnerStatus.DONE => /* do nothing and move on */
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
if (failedJobs.size > 0) {
|
2010-10-14 23:58:52 +08:00
|
|
|
emailFailedJobs(failedJobs)
|
2010-10-23 06:22:30 +08:00
|
|
|
checkRetryJobs(failedJobs)
|
|
|
|
|
}
|
2010-08-12 05:58:26 +08:00
|
|
|
|
2010-10-14 23:58:52 +08:00
|
|
|
if (readyJobs.size == 0 && runningJobs.size > 0)
|
|
|
|
|
Thread.sleep(30000L)
|
|
|
|
|
readyJobs = getReadyJobs
|
|
|
|
|
}
|
2010-10-21 05:43:52 +08:00
|
|
|
|
|
|
|
|
deleteIntermediateOutputs()
|
2010-10-14 23:58:52 +08:00
|
|
|
} catch {
|
|
|
|
|
case e =>
|
|
|
|
|
logger.error("Uncaught error running jobs.", e)
|
|
|
|
|
throw e
|
2010-10-16 01:01:36 +08:00
|
|
|
} finally {
|
|
|
|
|
emailStatus()
|
2010-08-10 00:42:48 +08:00
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-21 05:43:52 +08:00
|
|
|
/**
|
|
|
|
|
* Updates the status of edges in the graph.
|
|
|
|
|
* @param cleanOutputs If true will delete outputs when setting edges to pending.
|
|
|
|
|
*/
|
|
|
|
|
private def updateGraphStatus(cleanOutputs: Boolean) = {
|
|
|
|
|
traverseFunctions(edge => checkDone(edge, cleanOutputs))
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-19 06:11:14 +08:00
|
|
|
/**
|
|
|
|
|
* Checks if an edge is done or if it's an intermediate edge if it can be skipped.
|
|
|
|
|
* This function may modify previous edges if it discovers that the edge passed in
|
|
|
|
|
* is dependent jobs that were previously marked as skipped.
|
|
|
|
|
* @param edge Edge to check to see if it's done or can be skipped.
|
2010-10-21 05:43:52 +08:00
|
|
|
* @param cleanOutputs If true will delete outputs when setting edges to pending.
|
2010-10-19 06:11:14 +08:00
|
|
|
*/
|
2010-10-21 05:43:52 +08:00
|
|
|
private def checkDone(edge: FunctionEdge, cleanOutputs: Boolean) = {
|
2010-10-19 06:11:14 +08:00
|
|
|
if (edge.function.isIntermediate) {
|
|
|
|
|
// By default we do not need to run intermediate edges.
|
|
|
|
|
// Mark any intermediate edges as skipped, if they're not already done.
|
|
|
|
|
if (edge.status != RunnerStatus.DONE)
|
|
|
|
|
edge.markAsSkipped()
|
|
|
|
|
} else {
|
|
|
|
|
val previous = this.previousFunctions(edge)
|
|
|
|
|
val isDone = edge.status == RunnerStatus.DONE &&
|
|
|
|
|
previous.forall(edge => edge.status == RunnerStatus.DONE || edge.status == RunnerStatus.SKIPPED)
|
|
|
|
|
if (!isDone) {
|
2010-10-21 05:43:52 +08:00
|
|
|
edge.resetToPending(cleanOutputs)
|
|
|
|
|
resetPreviousSkipped(edge, previous, cleanOutputs)
|
2010-10-19 06:11:14 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* From the previous edges, resets any that are marked as skipped to pending.
|
|
|
|
|
* If those that are reset have skipped edges, those skipped edges are recursively also set
|
|
|
|
|
* to pending.
|
|
|
|
|
* @param edge Dependent edge.
|
|
|
|
|
* @param previous Previous edges that provide inputs to edge.
|
2010-10-21 05:43:52 +08:00
|
|
|
* @param cleanOutputs If true will clean up the output files when resetting skipped jobs to pending.
|
2010-10-19 06:11:14 +08:00
|
|
|
*/
|
2010-10-21 05:43:52 +08:00
|
|
|
private def resetPreviousSkipped(edge: FunctionEdge, previous: List[FunctionEdge], cleanOutputs: Boolean): Unit = {
|
2010-10-19 06:11:14 +08:00
|
|
|
for (previousEdge <- previous.filter(_.status == RunnerStatus.SKIPPED)) {
|
2010-10-21 05:43:52 +08:00
|
|
|
previousEdge.resetToPending(cleanOutputs)
|
|
|
|
|
resetPreviousSkipped(previousEdge, this.previousFunctions(previousEdge), cleanOutputs)
|
2010-10-19 06:11:14 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
private def newRunner(f: QFunction) = {
|
2010-10-08 03:08:02 +08:00
|
|
|
f match {
|
|
|
|
|
case cmd: CommandLineFunction =>
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.bsubAllJobs)
|
2010-10-08 03:08:02 +08:00
|
|
|
new LsfJobRunner(cmd)
|
|
|
|
|
else
|
|
|
|
|
new ShellJobRunner(cmd)
|
|
|
|
|
case inProc: InProcessFunction =>
|
|
|
|
|
new InProcessRunner(inProc)
|
|
|
|
|
case _ =>
|
|
|
|
|
throw new QException("Unexpected function: " + f)
|
2010-10-07 02:29:56 +08:00
|
|
|
}
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-16 01:01:36 +08:00
|
|
|
private def emailFailedJobs(failed: List[FunctionEdge]) = {
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.statusEmailTo.size > 0) {
|
2010-10-14 23:58:52 +08:00
|
|
|
val emailMessage = new EmailMessage
|
2010-10-23 06:22:30 +08:00
|
|
|
emailMessage.from = settings.statusEmailFrom
|
|
|
|
|
emailMessage.to = settings.statusEmailTo
|
2010-10-14 23:58:52 +08:00
|
|
|
emailMessage.subject = "Queue function: Failure"
|
2010-10-16 01:01:36 +08:00
|
|
|
addFailedFunctions(emailMessage, failed)
|
2010-10-23 06:22:30 +08:00
|
|
|
emailMessage.trySend(settings.qSettings.emailSettings)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def checkRetryJobs(failed: List[FunctionEdge]) = {
|
|
|
|
|
if (settings.retries > 0) {
|
|
|
|
|
for (failedJob <- failed) {
|
|
|
|
|
if (failedJob.retries < settings.retries) {
|
|
|
|
|
failedJob.retries += 1
|
|
|
|
|
failedJob.resetToPending(true)
|
|
|
|
|
logger.info("Reset for retry attempt %d of %d: %s".format(
|
|
|
|
|
failedJob.retries, settings.retries, failedJob.function.description))
|
|
|
|
|
} else {
|
|
|
|
|
logger.info("Giving up after retrying %d times: %s".format(
|
|
|
|
|
settings.retries, failedJob.function.description))
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-10-14 23:58:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def emailStatus() = {
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.statusEmailTo.size > 0) {
|
2010-10-16 01:01:36 +08:00
|
|
|
var failed = List.empty[FunctionEdge]
|
2010-10-14 23:58:52 +08:00
|
|
|
foreachFunction(edge => {
|
|
|
|
|
if (edge.status == RunnerStatus.FAILED) {
|
2010-10-16 01:01:36 +08:00
|
|
|
failed :+= edge
|
2010-10-14 23:58:52 +08:00
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
val emailMessage = new EmailMessage
|
2010-10-23 06:22:30 +08:00
|
|
|
emailMessage.from = settings.statusEmailFrom
|
|
|
|
|
emailMessage.to = settings.statusEmailTo
|
2010-10-16 01:01:36 +08:00
|
|
|
emailMessage.body = getStatus + nl
|
|
|
|
|
if (failed.size == 0) {
|
2010-10-14 23:58:52 +08:00
|
|
|
emailMessage.subject = "Queue run: Success"
|
|
|
|
|
} else {
|
|
|
|
|
emailMessage.subject = "Queue run: Failure"
|
2010-10-16 01:01:36 +08:00
|
|
|
addFailedFunctions(emailMessage, failed)
|
2010-10-14 23:58:52 +08:00
|
|
|
}
|
2010-10-23 06:22:30 +08:00
|
|
|
emailMessage.trySend(settings.qSettings.emailSettings)
|
2010-10-14 23:58:52 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-16 01:01:36 +08:00
|
|
|
private def addFailedFunctions(emailMessage: EmailMessage, failed: List[FunctionEdge]) = {
|
|
|
|
|
val logs = failed.flatMap(edge => logFiles(edge))
|
|
|
|
|
|
|
|
|
|
if (emailMessage.body == null)
|
|
|
|
|
emailMessage.body = ""
|
|
|
|
|
emailMessage.body += """
|
|
|
|
|
|Failed functions:
|
|
|
|
|
|
|
|
|
|
|
|%s
|
|
|
|
|
|
|
|
|
|
|
|Logs:
|
|
|
|
|
|%s%n
|
|
|
|
|
|""".stripMargin.trim.format(
|
2010-10-23 06:22:30 +08:00
|
|
|
failed.map(edge => failedDescription(edge)).mkString(nl+nl),
|
2010-10-16 01:01:36 +08:00
|
|
|
logs.map(_.getAbsolutePath).mkString(nl))
|
|
|
|
|
|
|
|
|
|
emailMessage.attachments = logs
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-23 06:22:30 +08:00
|
|
|
private def failedDescription(failed: FunctionEdge) = {
|
|
|
|
|
var description = new StringBuilder
|
|
|
|
|
if (settings.retries > 0)
|
|
|
|
|
description.append("Attempt %d of %d.%n".format(failed.retries + 1, settings.retries + 1))
|
|
|
|
|
description.append(failed.function.description)
|
|
|
|
|
description.toString
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-14 23:58:52 +08:00
|
|
|
private def logFiles(edge: FunctionEdge) = {
|
|
|
|
|
var failedOutputs = List.empty[File]
|
2010-10-16 01:01:36 +08:00
|
|
|
failedOutputs :+= edge.function.jobOutputFile
|
|
|
|
|
if (edge.function.jobErrorFile != null)
|
|
|
|
|
failedOutputs :+= edge.function.jobErrorFile
|
2010-10-14 23:58:52 +08:00
|
|
|
failedOutputs.filter(file => file != null && file.exists)
|
|
|
|
|
}
|
|
|
|
|
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
/**
|
2010-10-07 02:29:56 +08:00
|
|
|
* Tracks analysis status.
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private class AnalysisStatus(val analysisName: String) {
|
|
|
|
|
var status = RunnerStatus.PENDING
|
|
|
|
|
var scatter = new ScatterGatherStatus
|
|
|
|
|
var gather = new ScatterGatherStatus
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2010-10-07 02:29:56 +08:00
|
|
|
* Tracks scatter gather status.
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private class ScatterGatherStatus {
|
|
|
|
|
var total = 0
|
|
|
|
|
var done = 0
|
|
|
|
|
var failed = 0
|
2010-10-19 06:11:14 +08:00
|
|
|
var skipped = 0
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-14 23:58:52 +08:00
|
|
|
/**
|
|
|
|
|
* Gets job statuses by traversing the graph and looking for status-related files
|
|
|
|
|
*/
|
|
|
|
|
private def getStatus = {
|
|
|
|
|
val buffer = new StringBuilder
|
|
|
|
|
doStatus(status => buffer.append(status).append(nl))
|
|
|
|
|
buffer.toString
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gets job statuses by traversing the graph and looking for status-related files
|
|
|
|
|
*/
|
|
|
|
|
private def doStatus(statusFunc: String => Unit) = {
|
2010-10-16 04:00:35 +08:00
|
|
|
var statuses = List.empty[AnalysisStatus]
|
|
|
|
|
var maxWidth = 0
|
|
|
|
|
foreachFunction(edge => {
|
|
|
|
|
val name = edge.function.analysisName
|
|
|
|
|
if (name != null) {
|
2010-10-21 05:43:52 +08:00
|
|
|
updateAnalysisStatus(statuses.find(_.analysisName == name) match {
|
2010-10-07 02:29:56 +08:00
|
|
|
case Some(status) => status
|
|
|
|
|
case None =>
|
2010-10-16 04:00:35 +08:00
|
|
|
val status = new AnalysisStatus(name)
|
|
|
|
|
maxWidth = maxWidth max name.length
|
|
|
|
|
statuses :+= status
|
2010-10-07 02:29:56 +08:00
|
|
|
status
|
2010-10-16 04:00:35 +08:00
|
|
|
}, edge)
|
2010-10-07 09:19:18 +08:00
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
})
|
|
|
|
|
|
2010-10-16 04:00:35 +08:00
|
|
|
statuses.foreach(status => {
|
2010-10-19 06:11:14 +08:00
|
|
|
val sgTotal = status.scatter.total + status.gather.total
|
|
|
|
|
val sgDone = status.scatter.done + status.gather.done
|
|
|
|
|
val sgFailed = status.scatter.failed + status.gather.failed
|
|
|
|
|
val sgSkipped = status.scatter.skipped + status.gather.skipped
|
2010-10-21 05:43:52 +08:00
|
|
|
val gatherTotal = status.gather.total
|
|
|
|
|
val gatherDone = status.gather.done
|
2010-10-19 06:11:14 +08:00
|
|
|
if (sgTotal > 0) {
|
2010-10-07 02:29:56 +08:00
|
|
|
var sgStatus = RunnerStatus.PENDING
|
2010-10-19 06:11:14 +08:00
|
|
|
if (sgFailed > 0)
|
2010-10-07 02:29:56 +08:00
|
|
|
sgStatus = RunnerStatus.FAILED
|
2010-10-21 05:43:52 +08:00
|
|
|
else if (gatherDone == gatherTotal)
|
2010-10-07 02:29:56 +08:00
|
|
|
sgStatus = RunnerStatus.DONE
|
2010-10-19 06:11:14 +08:00
|
|
|
else if (sgDone + sgSkipped == sgTotal)
|
|
|
|
|
sgStatus = RunnerStatus.SKIPPED
|
|
|
|
|
else if (sgDone > 0)
|
2010-10-07 02:29:56 +08:00
|
|
|
sgStatus = RunnerStatus.RUNNING
|
|
|
|
|
status.status = sgStatus
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-19 06:11:14 +08:00
|
|
|
var info = ("%-" + maxWidth + "s [%s]")
|
|
|
|
|
.format(status.analysisName, StringUtils.center(status.status.toString, 7))
|
2010-10-07 02:29:56 +08:00
|
|
|
if (status.scatter.total + status.gather.total > 1) {
|
|
|
|
|
info += formatSGStatus(status.scatter, "s")
|
|
|
|
|
info += formatSGStatus(status.gather, "g")
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
2010-10-14 23:58:52 +08:00
|
|
|
statusFunc(info)
|
2010-10-07 02:29:56 +08:00
|
|
|
})
|
|
|
|
|
}
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
/**
|
|
|
|
|
* Updates a status map with scatter/gather status information (e.g. counts)
|
|
|
|
|
*/
|
2010-10-21 05:43:52 +08:00
|
|
|
private def updateAnalysisStatus(stats: AnalysisStatus, edge: FunctionEdge) = {
|
2010-10-07 02:29:56 +08:00
|
|
|
if (edge.function.isInstanceOf[GatherFunction]) {
|
|
|
|
|
updateSGStatus(stats.gather, edge)
|
2010-10-07 09:19:18 +08:00
|
|
|
} else if (edge.function.isInstanceOf[CloneFunction]) {
|
2010-10-07 02:29:56 +08:00
|
|
|
updateSGStatus(stats.scatter, edge)
|
|
|
|
|
} else {
|
|
|
|
|
stats.status = edge.status
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
}
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
private def updateSGStatus(stats: ScatterGatherStatus, edge: FunctionEdge) = {
|
|
|
|
|
stats.total += 1
|
|
|
|
|
edge.status match {
|
2010-10-19 06:11:14 +08:00
|
|
|
case RunnerStatus.DONE => stats.done += 1
|
|
|
|
|
case RunnerStatus.FAILED => stats.failed += 1
|
|
|
|
|
case RunnerStatus.SKIPPED => stats.skipped += 1
|
2010-10-07 02:29:56 +08:00
|
|
|
/* can't tell the difference between pending and running right now! */
|
|
|
|
|
case RunnerStatus.PENDING =>
|
|
|
|
|
case RunnerStatus.RUNNING =>
|
Queue now does job tracking (replace -run with -status in the command line). Produces output that looks like:
INFO 20:58:17,827 QCommandLine - Checking pipeline status
INFO 20:58:23,234 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_MergeIndels [DONE]
INFO 20:58:23,236 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_158.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,237 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_929.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,238 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_SNP_calls [NOT DONE] 5t/0d/0r/5p/0f
INFO 20:58:23,239 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_HandFilter [NOT DONE]
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1122.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,240 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantRecalibrator [NOT DONE]
INFO 20:58:23,241 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_913.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,242 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_2037.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,243 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_VariantEval [NOT DONE]
INFO 20:58:23,244 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster [NOT DONE]
INFO 20:58:23,245 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_106.bam [DONE] 5t/5d/0r/0p/0f
INFO 20:58:23,246 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_Cluster_and_Indel_filter [NOT DONE]
INFO 20:58:23,247 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_ApplyVariantCuts [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - Height_Hirschhorn_NHGRI.uncleaned_GenomicAnnotator [NOT DONE]
INFO 20:58:23,248 QGraph$$anonfun$formatStatus$1 - IndelGenotyper_1713.bam [DONE] 5t/5d/0r/0p/0f
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4340 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 08:59:09 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2010-10-07 02:29:56 +08:00
|
|
|
* Formats a status into nice strings
|
2010-08-10 00:42:48 +08:00
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private def formatSGStatus(stats: ScatterGatherStatus, prefix: String) = {
|
|
|
|
|
" %s:%dt/%dd/%df".format(
|
|
|
|
|
prefix, stats.total, stats.done, stats.failed)
|
|
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
2010-10-07 02:29:56 +08:00
|
|
|
* Creates a new graph where if new edges are needed (for cyclic dependency checking) they can be automatically created using a generic MappingFunction.
|
|
|
|
|
* @return A new graph
|
2010-08-10 00:42:48 +08:00
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private def newGraph = new SimpleDirectedGraph[QNode, QEdge](new EdgeFactory[QNode, QEdge] {
|
|
|
|
|
def createEdge(input: QNode, output: QNode) = new MappingEdge(input.files, output.files)})
|
|
|
|
|
|
|
|
|
|
private def addEdge(edge: QEdge) = {
|
|
|
|
|
val inputs = QNode(edge.inputs)
|
|
|
|
|
val outputs = QNode(edge.outputs)
|
|
|
|
|
val newSource = jobGraph.addVertex(inputs)
|
|
|
|
|
val newTarget = jobGraph.addVertex(outputs)
|
|
|
|
|
val removedEdges = jobGraph.removeAllEdges(inputs, outputs)
|
|
|
|
|
val added = jobGraph.addEdge(inputs, outputs, edge)
|
|
|
|
|
if (this.debugMode) {
|
|
|
|
|
logger.debug("Mapped from: " + inputs)
|
|
|
|
|
logger.debug("Mapped to: " + outputs)
|
|
|
|
|
logger.debug("Mapped via: " + edge)
|
|
|
|
|
logger.debug("Removed edges: " + removedEdges)
|
|
|
|
|
logger.debug("New source?: " + newSource)
|
|
|
|
|
logger.debug("New target?: " + newTarget)
|
|
|
|
|
logger.debug("")
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Checks to see if the set of files has more than one file and if so adds input mappings between the set and the individual files.
|
|
|
|
|
* @param files Set to check.
|
|
|
|
|
*/
|
|
|
|
|
private def addCollectionInputs(files: Set[File]): Unit = {
|
|
|
|
|
if (files.size > 1)
|
|
|
|
|
for (file <- files)
|
|
|
|
|
addMappingEdge(Set(file), files)
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Checks to see if the set of files has more than one file and if so adds output mappings between the individual files and the set.
|
|
|
|
|
* @param files Set to check.
|
|
|
|
|
*/
|
|
|
|
|
private def addCollectionOutputs(files: Set[File]): Unit = {
|
|
|
|
|
if (files.size > 1)
|
|
|
|
|
for (file <- files)
|
|
|
|
|
addMappingEdge(files, Set(file))
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Adds a directed graph edge between the input set and the output set if there isn't a direct relationship between the two nodes already.
|
|
|
|
|
* @param input Input set of files.
|
|
|
|
|
* @param output Output set of files.
|
|
|
|
|
*/
|
|
|
|
|
private def addMappingEdge(input: Set[File], output: Set[File]) = {
|
|
|
|
|
val hasEdge = input == output ||
|
|
|
|
|
jobGraph.getEdge(QNode(input), QNode(output)) != null ||
|
|
|
|
|
jobGraph.getEdge(QNode(output), QNode(input)) != null
|
2010-06-26 04:51:13 +08:00
|
|
|
if (!hasEdge)
|
2010-10-07 02:29:56 +08:00
|
|
|
addEdge(new MappingEdge(input, output))
|
2010-06-26 04:51:13 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if the edge is mapping edge that is not needed because it does
|
|
|
|
|
* not direct input or output from a user generated CommandLineFunction.
|
|
|
|
|
* @param edge Edge to check.
|
|
|
|
|
* @return true if the edge is not needed in the graph.
|
|
|
|
|
*/
|
2010-10-07 02:29:56 +08:00
|
|
|
private def isFiller(edge: QEdge) = {
|
|
|
|
|
if (edge.isInstanceOf[MappingEdge]) {
|
2010-06-26 04:51:13 +08:00
|
|
|
if (jobGraph.outgoingEdgesOf(jobGraph.getEdgeTarget(edge)).size == 0)
|
2010-06-23 02:39:20 +08:00
|
|
|
true
|
2010-06-26 04:51:13 +08:00
|
|
|
else if (jobGraph.incomingEdgesOf(jobGraph.getEdgeSource(edge)).size == 0)
|
2010-06-23 02:39:20 +08:00
|
|
|
true
|
|
|
|
|
else false
|
|
|
|
|
} else false
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if the node is not connected to any edges.
|
2010-10-07 02:29:56 +08:00
|
|
|
* @param node Node (set of files) to check.
|
2010-08-10 00:42:48 +08:00
|
|
|
* @return true if this set of files is not needed in the graph.
|
|
|
|
|
*/
|
2010-06-23 02:39:20 +08:00
|
|
|
private def isOrphan(node: QNode) =
|
|
|
|
|
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
|
2010-07-16 06:32:48 +08:00
|
|
|
|
2010-08-12 05:58:26 +08:00
|
|
|
/**
|
2010-10-07 09:19:18 +08:00
|
|
|
* Utility function for running a method over all function edges.
|
|
|
|
|
* @param edgeFunction Function to run for each FunctionEdge.
|
2010-08-12 05:58:26 +08:00
|
|
|
*/
|
2010-10-07 09:19:18 +08:00
|
|
|
private def foreachFunction(f: (FunctionEdge) => Unit) = {
|
2010-10-16 04:00:35 +08:00
|
|
|
jobGraph.edgeSet.toList
|
|
|
|
|
.filter(_.isInstanceOf[FunctionEdge])
|
|
|
|
|
.map(_.asInstanceOf[FunctionEdge])
|
|
|
|
|
.sortWith(compare(_,_))
|
|
|
|
|
.foreach(f(_))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def compare(f1: FunctionEdge, f2: FunctionEdge): Boolean =
|
|
|
|
|
compare(f1.function, f2.function)
|
|
|
|
|
|
|
|
|
|
private def compare(f1: QFunction, f2: QFunction): Boolean = {
|
|
|
|
|
val len1 = f1.addOrder.size
|
|
|
|
|
val len2 = f2.addOrder.size
|
|
|
|
|
val len = len1 min len2
|
|
|
|
|
|
|
|
|
|
for (i <- 0 until len) {
|
|
|
|
|
val order1 = f1.addOrder(i)
|
|
|
|
|
val order2 = f2.addOrder(i)
|
|
|
|
|
if (order1 < order2)
|
|
|
|
|
return true
|
|
|
|
|
if (order1 > order2)
|
|
|
|
|
return false
|
2010-10-07 09:19:18 +08:00
|
|
|
}
|
2010-10-16 04:00:35 +08:00
|
|
|
if (len1 < len2)
|
|
|
|
|
return true
|
|
|
|
|
else
|
|
|
|
|
return false
|
2010-08-12 05:58:26 +08:00
|
|
|
}
|
|
|
|
|
|
2010-10-08 03:08:02 +08:00
|
|
|
/**
|
|
|
|
|
* Utility function for running a method over all functions, but traversing the nodes in order of dependency.
|
|
|
|
|
* @param edgeFunction Function to run for each FunctionEdge.
|
|
|
|
|
*/
|
|
|
|
|
private def traverseFunctions(f: (FunctionEdge) => Unit) = {
|
|
|
|
|
val iterator = new TopologicalOrderIterator(this.jobGraph)
|
|
|
|
|
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QEdge] {
|
|
|
|
|
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QEdge]) = {
|
|
|
|
|
event.getEdge match {
|
|
|
|
|
case functionEdge: FunctionEdge => f(functionEdge)
|
|
|
|
|
case map: MappingEdge => /* do nothing for mapping functions */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
iterator.foreach(_ => {})
|
2010-10-21 05:43:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private def deleteIntermediateOutputs() = {
|
2010-10-23 06:22:30 +08:00
|
|
|
if (settings.deleteIntermediates && !hasFailed) {
|
2010-10-21 05:43:52 +08:00
|
|
|
logger.info("Deleting intermediate files.")
|
|
|
|
|
traverseFunctions(edge => {
|
|
|
|
|
if (edge.function.isIntermediate) {
|
|
|
|
|
logger.debug("Deleting intermediates:" + edge.function.description)
|
|
|
|
|
edge.function.deleteOutputs()
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-10-08 03:08:02 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Outputs the graph to a .dot file.
|
|
|
|
|
* http://en.wikipedia.org/wiki/DOT_language
|
|
|
|
|
* @param file Path to output the .dot file.
|
|
|
|
|
*/
|
2010-08-12 05:58:26 +08:00
|
|
|
private def renderToDot(file: java.io.File) = {
|
2010-07-16 06:32:48 +08:00
|
|
|
val out = new java.io.FileWriter(file)
|
|
|
|
|
|
|
|
|
|
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
|
|
|
|
|
// todo -- visualizeString() command, or something that shows inputs / outputs
|
2010-10-07 02:29:56 +08:00
|
|
|
val ve = new org.jgrapht.ext.EdgeNameProvider[QEdge] {
|
2010-10-07 09:19:18 +08:00
|
|
|
def getEdgeName(function: QEdge) = if (function.dotString == null) "" else function.dotString.replace("\"", "\\\"")
|
2010-07-16 06:32:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
|
|
|
|
|
(new DOTExporter(new org.jgrapht.ext.IntegerNameProvider[QNode](), null, ve)).export(out, jobGraph)
|
|
|
|
|
|
|
|
|
|
out.close
|
|
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns true if any of the jobs in the graph have a status of failed.
|
|
|
|
|
* @return true if any of the jobs in the graph have a status of failed.
|
|
|
|
|
*/
|
|
|
|
|
def hasFailed = {
|
2010-10-08 03:08:02 +08:00
|
|
|
!this.dryRun && this.jobGraph.edgeSet.exists(edge => {
|
2010-10-07 02:29:56 +08:00
|
|
|
edge.isInstanceOf[FunctionEdge] && edge.asInstanceOf[FunctionEdge].status == RunnerStatus.FAILED
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-19 06:11:14 +08:00
|
|
|
def logFailed = {
|
|
|
|
|
foreachFunction(edge => {
|
2010-10-21 05:43:52 +08:00
|
|
|
if (edge.status == RunnerStatus.FAILED)
|
|
|
|
|
logEdge(edge)
|
2010-10-19 06:11:14 +08:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2010-10-07 02:29:56 +08:00
|
|
|
/**
|
|
|
|
|
* Kills any forked jobs still running.
|
|
|
|
|
*/
|
|
|
|
|
def shutdown() {
|
2010-10-16 01:01:36 +08:00
|
|
|
shuttingDown = true
|
2010-10-07 09:19:18 +08:00
|
|
|
val lsfJobRunners = getRunningJobs.filter(_.runner.isInstanceOf[LsfJobRunner]).map(_.runner.asInstanceOf[LsfJobRunner])
|
|
|
|
|
if (lsfJobRunners.size > 0) {
|
|
|
|
|
for (jobRunners <- lsfJobRunners.filterNot(_.job.bsubJobId == null).grouped(10)) {
|
2010-10-07 02:29:56 +08:00
|
|
|
try {
|
2010-10-07 09:19:18 +08:00
|
|
|
val bkill = new LsfKillJob(jobRunners.map(_.job))
|
2010-10-07 02:29:56 +08:00
|
|
|
logger.info(bkill.command)
|
|
|
|
|
bkill.run()
|
|
|
|
|
} catch {
|
|
|
|
|
case jee: JobExitException =>
|
|
|
|
|
logger.error("Unable to kill all jobs:%n%s".format(jee.getMessage))
|
|
|
|
|
case e =>
|
|
|
|
|
logger.error("Unable to kill jobs.", e)
|
|
|
|
|
}
|
2010-10-07 09:19:18 +08:00
|
|
|
try {
|
|
|
|
|
jobRunners.foreach(_.removeTemporaryFiles())
|
|
|
|
|
} catch {
|
|
|
|
|
case e => /* ignore */
|
|
|
|
|
}
|
2010-10-07 02:29:56 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|