2010-06-15 12:43:46 +08:00
|
|
|
package org.broadinstitute.sting.queue.engine
|
|
|
|
|
|
2010-07-16 06:32:48 +08:00
|
|
|
import org.jgrapht.traverse.TopologicalOrderIterator
|
2010-06-15 12:43:46 +08:00
|
|
|
import org.jgrapht.graph.SimpleDirectedGraph
|
|
|
|
|
import scala.collection.JavaConversions
|
2010-06-23 02:39:20 +08:00
|
|
|
import scala.collection.JavaConversions._
|
2010-06-15 12:43:46 +08:00
|
|
|
import org.broadinstitute.sting.queue.function.{MappingFunction, CommandLineFunction, QFunction}
|
2010-06-23 02:39:20 +08:00
|
|
|
import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
|
2010-08-10 00:42:48 +08:00
|
|
|
import org.broadinstitute.sting.queue.util.Logging
|
2010-06-23 02:39:20 +08:00
|
|
|
import org.broadinstitute.sting.queue.QException
|
|
|
|
|
import org.jgrapht.alg.CycleDetector
|
|
|
|
|
import org.jgrapht.EdgeFactory
|
2010-07-16 06:32:48 +08:00
|
|
|
import org.jgrapht.ext.DOTExporter
|
2010-08-10 00:42:48 +08:00
|
|
|
import java.io.File
|
2010-06-15 12:43:46 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* The internal dependency tracker between sets of function input and output files.
|
|
|
|
|
*/
|
2010-06-15 12:43:46 +08:00
|
|
|
class QGraph extends Logging {
|
|
|
|
|
var dryRun = true
|
|
|
|
|
var bsubAllJobs = false
|
2010-06-29 03:52:17 +08:00
|
|
|
var bsubWaitJobs = false
|
2010-06-23 02:39:20 +08:00
|
|
|
val jobGraph = newGraph
|
2010-06-15 12:43:46 +08:00
|
|
|
def numJobs = JavaConversions.asSet(jobGraph.edgeSet).filter(_.isInstanceOf[CommandLineFunction]).size
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Adds a QScript created CommandLineFunction to the graph.
|
|
|
|
|
* @param command Function to add to the graph.
|
|
|
|
|
*/
|
2010-06-15 12:43:46 +08:00
|
|
|
def add(command: CommandLineFunction) {
|
2010-06-26 04:51:13 +08:00
|
|
|
addFunction(command)
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Looks through functions with multiple inputs and outputs and adds mapping functions for single inputs and outputs.
|
|
|
|
|
*/
|
|
|
|
|
def fillIn = {
|
|
|
|
|
// clone since edgeSet is backed by the graph
|
|
|
|
|
for (function <- JavaConversions.asSet(jobGraph.edgeSet).clone) {
|
2010-06-26 04:51:13 +08:00
|
|
|
addCollectionOutputs(function.outputs)
|
|
|
|
|
addCollectionInputs(function.inputs)
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
|
2010-06-23 02:39:20 +08:00
|
|
|
var pruning = true
|
|
|
|
|
while (pruning) {
|
|
|
|
|
pruning = false
|
|
|
|
|
val filler = jobGraph.edgeSet.filter(isFiller(_))
|
|
|
|
|
if (filler.size > 0) {
|
|
|
|
|
jobGraph.removeAllEdges(filler)
|
|
|
|
|
pruning = true
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
|
|
|
|
jobGraph.removeAllVertices(jobGraph.vertexSet.filter(isOrphan(_)))
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
|
|
|
|
|
*/
|
2010-06-15 12:43:46 +08:00
|
|
|
def run = {
|
|
|
|
|
var isReady = true
|
2010-08-10 00:42:48 +08:00
|
|
|
var totalMissingValues = 0
|
2010-06-15 12:43:46 +08:00
|
|
|
for (function <- JavaConversions.asSet(jobGraph.edgeSet)) {
|
2010-06-23 02:39:20 +08:00
|
|
|
function match {
|
|
|
|
|
case cmd: CommandLineFunction =>
|
2010-08-10 00:42:48 +08:00
|
|
|
val missingFieldValues = cmd.missingFields
|
|
|
|
|
if (missingFieldValues.size > 0) {
|
|
|
|
|
totalMissingValues += missingFieldValues.size
|
|
|
|
|
logger.error("Missing %s values for function: %s".format(missingFieldValues.size, cmd.commandLine))
|
|
|
|
|
for (missing <- missingFieldValues)
|
2010-06-23 02:39:20 +08:00
|
|
|
logger.error(" " + missing)
|
|
|
|
|
}
|
|
|
|
|
case _ =>
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
if (totalMissingValues > 0) {
|
|
|
|
|
isReady = false
|
|
|
|
|
}
|
|
|
|
|
|
2010-06-23 02:39:20 +08:00
|
|
|
val detector = new CycleDetector(jobGraph)
|
|
|
|
|
if (detector.detectCycles) {
|
|
|
|
|
logger.error("Cycles were detected in the graph:")
|
|
|
|
|
for (cycle <- detector.findCycles)
|
|
|
|
|
logger.error(" " + cycle)
|
|
|
|
|
isReady = false
|
|
|
|
|
}
|
|
|
|
|
|
2010-06-15 12:43:46 +08:00
|
|
|
if (isReady || this.dryRun)
|
|
|
|
|
(new TopologicalJobScheduler(this) with LsfJobRunner).runJobs
|
2010-08-10 00:42:48 +08:00
|
|
|
|
|
|
|
|
if (totalMissingValues > 0) {
|
|
|
|
|
logger.error("Total missing values: " + totalMissingValues)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isReady && this.dryRun) {
|
|
|
|
|
logger.info("Dry run completed successfully!")
|
|
|
|
|
logger.info("Re-run with \"-run\" to execute the functions.")
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Creates a new graph where if new edges are needed (for cyclic dependency checking) they can be automatically created using a generic MappingFunction.
|
|
|
|
|
* @return A new graph
|
|
|
|
|
*/
|
2010-06-23 02:39:20 +08:00
|
|
|
private def newGraph = new SimpleDirectedGraph[QNode, QFunction](new EdgeFactory[QNode, QFunction] {
|
2010-08-10 00:42:48 +08:00
|
|
|
def createEdge(input: QNode, output: QNode) = new MappingFunction(input.files, output.files)})
|
2010-06-23 02:39:20 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Adds a generic QFunction to the graph.
|
|
|
|
|
* If the function is scatterable and the jobs request bsub, splits the job into parts and adds the parts instead.
|
|
|
|
|
* @param f Generic QFunction to add to the graph.
|
|
|
|
|
*/
|
2010-06-26 04:51:13 +08:00
|
|
|
private def addFunction(f: QFunction): Unit = {
|
2010-06-23 02:39:20 +08:00
|
|
|
try {
|
|
|
|
|
f.freeze
|
|
|
|
|
|
|
|
|
|
f match {
|
|
|
|
|
case scatterGather: ScatterGatherableFunction if (bsubAllJobs && scatterGather.scatterGatherable) =>
|
|
|
|
|
val functions = scatterGather.generateFunctions()
|
|
|
|
|
if (logger.isTraceEnabled)
|
|
|
|
|
logger.trace("Scattered into %d parts: %s".format(functions.size, functions))
|
2010-06-26 04:51:13 +08:00
|
|
|
functions.foreach(addFunction(_))
|
2010-06-23 02:39:20 +08:00
|
|
|
case _ =>
|
2010-06-26 04:51:13 +08:00
|
|
|
val inputs = QNode(f.inputs)
|
|
|
|
|
val outputs = QNode(f.outputs)
|
2010-06-23 02:39:20 +08:00
|
|
|
val newSource = jobGraph.addVertex(inputs)
|
|
|
|
|
val newTarget = jobGraph.addVertex(outputs)
|
2010-06-26 04:51:13 +08:00
|
|
|
val removedEdges = jobGraph.removeAllEdges(inputs, outputs)
|
2010-06-23 02:39:20 +08:00
|
|
|
val added = jobGraph.addEdge(inputs, outputs, f)
|
|
|
|
|
if (logger.isTraceEnabled) {
|
|
|
|
|
logger.trace("Mapped from: " + inputs)
|
|
|
|
|
logger.trace("Mapped to: " + outputs)
|
|
|
|
|
logger.trace("Mapped via: " + f)
|
|
|
|
|
logger.trace("Removed edges: " + removedEdges)
|
|
|
|
|
logger.trace("New source?: " + newSource)
|
|
|
|
|
logger.trace("New target?: " + newTarget)
|
|
|
|
|
logger.trace("")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
case e: Exception =>
|
|
|
|
|
throw new QException("Error adding function: " + f, e)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Checks to see if the set of files has more than one file and if so adds input mappings between the set and the individual files.
|
|
|
|
|
* @param files Set to check.
|
|
|
|
|
*/
|
|
|
|
|
private def addCollectionInputs(files: Set[File]): Unit = {
|
|
|
|
|
if (files.size > 1)
|
|
|
|
|
for (file <- files)
|
|
|
|
|
addMappingEdge(Set(file), files)
|
2010-06-23 02:39:20 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Checks to see if the set of files has more than one file and if so adds output mappings between the individual files and the set.
|
|
|
|
|
* @param files Set to check.
|
|
|
|
|
*/
|
|
|
|
|
private def addCollectionOutputs(files: Set[File]): Unit = {
|
|
|
|
|
if (files.size > 1)
|
|
|
|
|
for (file <- files)
|
|
|
|
|
addMappingEdge(files, Set(file))
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Adds a directed graph edge between the input set and the output set if there isn't a direct relationship between the two nodes already.
|
|
|
|
|
* @param input Input set of files.
|
|
|
|
|
* @param output Output set of files.
|
|
|
|
|
*/
|
|
|
|
|
private def addMappingEdge(input: Set[File], output: Set[File]) = {
|
|
|
|
|
val hasEdge = input == output ||
|
|
|
|
|
jobGraph.getEdge(QNode(input), QNode(output)) != null ||
|
|
|
|
|
jobGraph.getEdge(QNode(output), QNode(input)) != null
|
2010-06-26 04:51:13 +08:00
|
|
|
if (!hasEdge)
|
2010-08-10 00:42:48 +08:00
|
|
|
addFunction(new MappingFunction(input, output))
|
2010-06-26 04:51:13 +08:00
|
|
|
}
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if the edge is an internal mapping edge.
|
|
|
|
|
* @param edge Edge to check.
|
|
|
|
|
* @return true if the edge is an internal mapping edge.
|
|
|
|
|
*/
|
2010-06-23 02:39:20 +08:00
|
|
|
private def isMappingEdge(edge: QFunction) =
|
|
|
|
|
edge.isInstanceOf[MappingFunction]
|
|
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if the edge is mapping edge that is not needed because it does
|
|
|
|
|
* not direct input or output from a user generated CommandLineFunction.
|
|
|
|
|
* @param edge Edge to check.
|
|
|
|
|
* @return true if the edge is not needed in the graph.
|
|
|
|
|
*/
|
2010-06-23 02:39:20 +08:00
|
|
|
private def isFiller(edge: QFunction) = {
|
|
|
|
|
if (isMappingEdge(edge)) {
|
2010-06-26 04:51:13 +08:00
|
|
|
if (jobGraph.outgoingEdgesOf(jobGraph.getEdgeTarget(edge)).size == 0)
|
2010-06-23 02:39:20 +08:00
|
|
|
true
|
2010-06-26 04:51:13 +08:00
|
|
|
else if (jobGraph.incomingEdgesOf(jobGraph.getEdgeSource(edge)).size == 0)
|
2010-06-23 02:39:20 +08:00
|
|
|
true
|
|
|
|
|
else false
|
|
|
|
|
} else false
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|
2010-06-23 02:39:20 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if the node is not connected to any edges.
|
|
|
|
|
* @param node Node (set of files) to check
|
|
|
|
|
* @return true if this set of files is not needed in the graph.
|
|
|
|
|
*/
|
2010-06-23 02:39:20 +08:00
|
|
|
private def isOrphan(node: QNode) =
|
|
|
|
|
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
|
2010-07-16 06:32:48 +08:00
|
|
|
|
2010-08-10 00:42:48 +08:00
|
|
|
/**
|
|
|
|
|
* Outputs the graph to a .dot file.
|
|
|
|
|
* http://en.wikipedia.org/wiki/DOT_language
|
|
|
|
|
* @param file Path to output the .dot file.
|
|
|
|
|
*/
|
2010-07-16 06:32:48 +08:00
|
|
|
def renderToDot(file: java.io.File) = {
|
|
|
|
|
val out = new java.io.FileWriter(file)
|
|
|
|
|
|
|
|
|
|
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
|
|
|
|
|
// todo -- visualizeString() command, or something that shows inputs / outputs
|
|
|
|
|
val ve = new org.jgrapht.ext.EdgeNameProvider[QFunction] {
|
2010-07-17 04:54:51 +08:00
|
|
|
def getEdgeName( function: QFunction ) = function.dotString
|
2010-07-16 06:32:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
|
|
|
|
|
(new DOTExporter(new org.jgrapht.ext.IntegerNameProvider[QNode](), null, ve)).export(out, jobGraph)
|
|
|
|
|
|
|
|
|
|
out.close
|
|
|
|
|
}
|
2010-06-15 12:43:46 +08:00
|
|
|
}
|