When generating the QGraph, don't regenerate if there aren't scatter/gather jobs.

Fixed a display issue with the number of milliseconds that Queue has tried to contact LSF.
This commit is contained in:
Khalid Shakir 2011-07-11 19:17:58 -04:00
parent b327fa3779
commit e93052a51e
2 changed files with 26 additions and 24 deletions

View File

@ -138,8 +138,9 @@ class QGraph extends Logging {
validate() validate()
if (running && numMissingValues == 0) { if (running && numMissingValues == 0) {
logger.info("Generating scatter gather jobs.")
val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge)) val scatterGathers = jobGraph.edgeSet.filter(edge => scatterGatherable(edge))
if (!scatterGathers.isEmpty) {
logger.info("Generating scatter gather jobs.")
var addedFunctions = List.empty[QFunction] var addedFunctions = List.empty[QFunction]
for (scatterGather <- scatterGathers) { for (scatterGather <- scatterGathers) {
@ -164,6 +165,7 @@ class QGraph extends Logging {
validate() validate()
} }
} }
}
private def scatterGatherable(edge: QEdge) = { private def scatterGatherable(edge: QEdge) = {
edge match { edge match {

View File

@ -286,11 +286,11 @@ object Lsf706JobRunner extends Logging {
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct) // LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist) // LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId)) logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId))
val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate) val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate)
if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) { if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
// Unknown status has been returned for a while now. // Unknown status has been returned for a while now.
runner.updateStatus(RunnerStatus.FAILED) runner.updateStatus(RunnerStatus.FAILED)
logger.error("Unable to read LSF status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description)) logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description))
} }
} }