Merge pull request #1172 from broadinstitute/ks_jd_parallel_shell

ParallelShell added as a new JobRunner
This commit is contained in:
kshakir 2015-09-29 20:29:41 -03:00
commit a762dcb6cf
7 changed files with 367 additions and 3 deletions

View File

@ -149,4 +149,23 @@ class HelloWorldQueueTest {
spec.expectedFilePaths = Seq("pipelineLogDir/HelloWorld-1.out")
QueueTest.executeTest(spec)
}
@Test(timeOut=36000000)
def testHelloWorldParallelShell() {
val spec = new QueueTestSpec
spec.name = "HelloWorldWithLogDirectory"
spec.args = "-S " + QueueTest.publicQScriptsPackageDir + "examples/HelloWorld.scala"
spec.jobRunners = Seq("ParallelShell")
QueueTest.executeTest(spec)
}
@Test(timeOut=36000000)
def testHelloWorldParallelShellMaxConcurrentRun() {
val spec = new QueueTestSpec
spec.name = "HelloWorldWithLogDirectory"
spec.args = "-S " + QueueTest.publicQScriptsPackageDir + "examples/HelloWorld.scala" +
" -maxConcurrentRun 10"
spec.jobRunners = Seq("ParallelShell")
QueueTest.executeTest(spec)
}
}

View File

@ -434,7 +434,20 @@ class QGraph extends Logging {
var doneJobs = Set.empty[FunctionEdge]
var failedJobs = Set.empty[FunctionEdge]
while (running && readyJobs.size > 0 && !readyRunningCheck(lastRunningCheck)) {
def startJobs: Boolean = {
def canRunMoreConcurrentJobs: Boolean =
if(settings.maximumNumberOfConcurrentJobs.isDefined)
runningJobs.size + startedJobs.size < settings.maximumNumberOfConcurrentJobs.get
else
true
running && readyJobs.size > 0 &&
!readyRunningCheck(lastRunningCheck) &&
canRunMoreConcurrentJobs
}
while (startJobs) {
val edge = readyJobs.head
edge.runner = newRunner(edge.function)
edge.start()

View File

@ -28,7 +28,7 @@ package org.broadinstitute.gatk.queue.engine
import java.io.File
import org.broadinstitute.gatk.queue.QSettings
import org.broadinstitute.gatk.queue.util.{EmailSettings, SystemUtils}
import org.broadinstitute.gatk.utils.commandline.{Advanced, ArgumentCollection, Argument}
import org.broadinstitute.gatk.utils.commandline.{ClassType, Advanced, ArgumentCollection, Argument}
/**
* Command line options for a QGraph.
@ -80,6 +80,11 @@ class QGraphSettings {
@Argument(fullName="disableJobReport", shortName="disableJobReport", doc="If provided, we will not create a job report", required=false)
var disableJobReport: Boolean = false
@Advanced
@ClassType(classOf[Int])
@Argument(fullName="maximumNumberOfJobsToRunConcurrently", shortName="maxConcurrentRun", doc="The maximum number of jobs to start at any given time. (Default is no limit)", required=false)
var maximumNumberOfConcurrentJobs: Option[Int] = None
@ArgumentCollection
val emailSettings = new EmailSettings

View File

@ -0,0 +1,70 @@
/*
* Copyright 2012-2015 Broad Institute, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.queue.engine.parallelshell
import org.broadinstitute.gatk.queue.function.CommandLineFunction
import org.broadinstitute.gatk.queue.engine.CommandLineJobManager
/**
* Runs multiple jobs locally without blocking.
* Use this with care as it might not be the most efficient way to run things.
* However, for some scenarios, such as running multiple single threaded
* programs concurrently it can be quite useful.
*
* All this code is based on the normal shell runner in GATK Queue and all
* credits for everything except the concurrency part goes to the GATK team.
*
* @author Johan Dahlberg
*
*/
class ParallelShellJobManager extends CommandLineJobManager[ParallelShellJobRunner] {
def runnerType = classOf[ParallelShellJobRunner]
/**
* Create new ParallelShellJobRunner
* @param function Function for the runner.
* @return a new ParallelShellJobRunner instance
*/
def create(function: CommandLineFunction) =
new ParallelShellJobRunner(function)
/**
* Update the status of the specified jobrunners.
* @param runners Runners to update.
* @return runners which were updated.
*/
override def updateStatus(
runners: Set[ParallelShellJobRunner]): Set[ParallelShellJobRunner] =
runners.filter { runner => runner.updateJobStatus() }
/**
* Stop the specified runners.
* @param runners Runners to stop.
*/
override def tryStop(runners: Set[ParallelShellJobRunner]) =
runners.foreach(_.tryStop())
}

View File

@ -0,0 +1,151 @@
/*
* Copyright 2012-2015 Broad Institute, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.queue.engine.parallelshell
import org.broadinstitute.gatk.queue.function.CommandLineFunction
import org.broadinstitute.gatk.queue.engine.{ RunnerStatus, CommandLineJobRunner }
import java.util.Date
import org.broadinstitute.gatk.utils.Utils
import org.broadinstitute.gatk.utils.runtime.{ ProcessSettings, OutputStreamSettings }
import scala.concurrent._
import ExecutionContext.Implicits.global
import scala.util.{ Success, Failure }
import org.broadinstitute.gatk.queue.util.Logging
/**
* Runs multiple jobs locally without blocking.
* Use this with care as it might not be the most efficient way to run things.
* However, for some scenarios, such as running multiple single threaded
* programs concurrently it can be quite useful.
*
* All this code is based on the normal shell runner in GATK Queue and all
* credits for everything except the concurrency part goes to the GATK team.
*
* @author Johan Dahlberg - 20150611
*
* @param function Command to run.
*/
class ParallelShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRunner with Logging {
// Controller on the thread that started the job
val controller: ThreadSafeProcessController = new ThreadSafeProcessController()
// Once the application exits this promise will be fulfilled.
val finalExitStatus = Promise[Int]()
/**
* Runs the function on the local shell.
*/
def start() {
val commandLine = Array("sh", jobScript.getAbsolutePath)
val stdoutSettings = new OutputStreamSettings
val stderrSettings = new OutputStreamSettings
val mergeError = function.jobErrorFile == null
stdoutSettings.setOutputFile(function.jobOutputFile, true)
if (function.jobErrorFile != null)
stderrSettings.setOutputFile(function.jobErrorFile, true)
if (logger.isDebugEnabled) {
stdoutSettings.printStandard(true)
stderrSettings.printStandard(true)
}
val processSettings = new ProcessSettings(
commandLine, mergeError, function.commandDirectory, null,
null, stdoutSettings, stderrSettings)
updateJobRun(processSettings)
getRunInfo.startTime = new Date()
getRunInfo.exechosts = Utils.resolveHostname()
updateStatus(RunnerStatus.RUNNING)
// Run the command line process in a future.
val executedFuture =
future { controller.exec(processSettings) }
// Register a callback on the completion of the future, making sure that
// the status of the job is updated accordingly.
executedFuture.onComplete {
case Success(exitStatus) =>
logger.debug(commandLine.mkString(" ") + " :: Got return on exit status in future: " + exitStatus)
finalExitStatus.success(exitStatus)
getRunInfo.doneTime = new Date()
exitStatusUpdateJobRunnerStatus(exitStatus)
case Failure(throwable) =>
logger.debug(
"Failed in return from run with: " +
throwable.getClass.getCanonicalName + " :: " +
throwable.getMessage)
finalExitStatus.failure(throwable)
getRunInfo.doneTime = new Date()
updateStatus(RunnerStatus.FAILED)
}
}
/**
* Possibly invoked from a shutdown thread, find and
* stop the controller from the originating thread
*/
def tryStop() = {
try {
controller.tryDestroy()
} catch {
case e: Exception =>
logger.error("Unable to kill shell job: " + function.description, e)
}
}
/**
* Update the status of the runner based on the exit status
* of the process.
*/
def exitStatusUpdateJobRunnerStatus(exitStatus: Int): Unit = {
exitStatus match {
case 0 => updateStatus(RunnerStatus.DONE)
case _ => updateStatus(RunnerStatus.FAILED)
}
}
/**
* Attempts to get the status of a job by looking at if the finalExitStatus
* promise has completed or not.
* @return if the jobRunner has updated it's status or not.
*/
def updateJobStatus(): Boolean = {
if (finalExitStatus.isCompleted) {
val completedExitStatus = finalExitStatus.future.value.get.get
exitStatusUpdateJobRunnerStatus(completedExitStatus)
true
} else {
// Make sure the status is update here, otherwise Queue will think
// it's lots control over the job and kill it after 5 minutes.
updateStatus(status)
false
}
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright 2012-2015 Broad Institute, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.queue.engine.parallelshell
import java.io.PrintWriter
import org.broadinstitute.gatk.queue.util.Logging
import org.broadinstitute.gatk.utils.runtime.ProcessSettings
import scala.sys.process._
/**
*
*/
class ThreadSafeProcessController extends Logging {
private var process: Option[Process] = None
/**
* Construct a process logger writing the stdout and stderr of the
* process controlled by this instance to the files specified in
* the provided ProcessSettings instance.
* @param processSettings specifiying which files to write to
* @return a process logger which can be used by the `scala.sys.process`
*/
private def getProcessLogger(processSettings: ProcessSettings): ProcessLogger = {
val (stdOutFile, stdErrFile) = {
val stdOutFile = processSettings.getStdoutSettings.getOutputFile
if(processSettings.getStderrSettings.getOutputFile != null) {
val stdErrFile = processSettings.getStderrSettings.getOutputFile
(stdOutFile, stdErrFile)
} else {
(stdOutFile, stdOutFile)
}
}
val stdOutPrintWriter = new PrintWriter(stdOutFile)
val stdErrPrintWriter = new PrintWriter(stdErrFile)
def printToWriter(printWriter: PrintWriter)(line: String): Unit = {
printWriter.println(line)
printWriter.flush()
}
val stringStdOutPrinterFunc = printToWriter(stdOutPrintWriter) _
val stringStdErrPrinterFunc = printToWriter(stdErrPrintWriter) _
val processLogger = ProcessLogger(
stringStdOutPrinterFunc,
stringStdErrPrinterFunc
)
processLogger
}
/**
* Execute the process specified in process settings
* @param processSettings specifying the commandline to run.
* @return the exit status of the process.
*/
def exec(processSettings: ProcessSettings): Int = {
val commandLine: ProcessBuilder = processSettings.getCommand.mkString(" ")
logger.debug("Trying to start process: " + commandLine)
process = Some(commandLine.run(getProcessLogger(processSettings)))
process.get.exitValue()
}
/**
* Attempt to destroy the underlying process.
*/
def tryDestroy(): Unit = {
logger.debug("Trying to kill process")
process.getOrElse {
throw new IllegalStateException("Tried to kill unstarted job.")
}.destroy()
}
}

View File

@ -53,7 +53,7 @@ object QueueTest extends BaseTest with Logging {
/**
* All the job runners configured to run QueueTests at The Broad.
*/
final val allJobRunners = Seq("GridEngine", "Shell")
final val allJobRunners = Seq("GridEngine", "Shell", "ParallelShell")
/**
* The default job runners to run.