Merge branch 'master' into NoDistributedGATK

This commit is contained in:
Mark DePristo 2011-07-23 20:04:50 -04:00
commit 1d3bcce2c4
26 changed files with 238 additions and 190 deletions

View File

@ -957,6 +957,12 @@
<!-- Unzip all classes from their current locations and assemble them in a staging directory -->
<target name="stage" description="stage files for distribution">
<mkdir dir="staging"/>
<!--
HACK: Create the edu directory before EDU on case-insensitive mac filesystems.
The ivy dependency colt -> concurrent contains an EDU.oswego package which
BCEL doesn't even pull in but messes up edu.mit.broad.
-->
<mkdir dir="staging/edu"/>
<unjar dest="staging" overwrite="false">
<fileset dir="${dist.dir}">
<patternset refid="dependency.mask" />

View File

@ -1,5 +1,5 @@
#!/bin/sh
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa"
export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa"
export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
export TARGET_LIB="libbwa.so"
export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"

View File

@ -1,5 +1,6 @@
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "bwase.h"
#include "bwa_gateway.h"
@ -27,6 +28,9 @@ BWA::BWA(const char* ann_filename,
bwt_restore_sa(reverse_sa_filename, bwts[1]);
load_default_options();
// Always reinitialize the random seed whenever a new set of files are loaded.
initialize_random_seed();
// initialize the bwase subsystem
bwase_initialize();
}
@ -207,6 +211,11 @@ void BWA::load_default_options()
options.trim_qual = 0;
}
void BWA::initialize_random_seed()
{
srand48(bns->seed);
}
void BWA::set_max_edit_distance(float edit_distance) {
if(edit_distance > 0 && edit_distance < 1) {
options.fnr = edit_distance;

View File

@ -37,6 +37,7 @@ class BWA {
gap_opt_t options;
void load_default_options();
void initialize_random_seed();
bwa_seq_t* create_sequence(const char* bases, const unsigned read_length);
void copy_bases_into_sequence(bwa_seq_t* sequence, const char* bases, const unsigned read_length);
Alignment generate_final_alignment_from_sequence(bwa_seq_t* sequence);

Binary file not shown.

View File

@ -391,7 +391,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (DEBUG)
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
//System.out.println(eventLength);
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
ref, hsize, numPrefBases);
@ -418,8 +418,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is lexically ordered, for example
// for 3 alleles it's 00 01 02 11 12 22
// which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example
// for 3 alleles it's 00 01 11 02 12 22
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
alleleList,
genotypeLikelihoods,

View File

@ -1042,8 +1042,8 @@ public class PairHMMIndelErrorModel {
int k=0;
double maxElement = Double.NEGATIVE_INFINITY;
for (int i=0; i < hSize; i++) {
for (int j=i; j < hSize; j++){
for (int j=0; j < hSize; j++) {
for (int i=0; i <= j; i++){
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
if (haplotypeLikehoodMatrix[i][j] > maxElement)
maxElement = haplotypeLikehoodMatrix[i][j];

View File

@ -233,6 +233,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
lowerRepeats();
} else {
lowerNonUniqueSegments();
aligner.close();
}
print();
}

View File

@ -14,7 +14,7 @@ import java.util.Arrays;
public class AlignerIntegrationTest extends WalkerTest {
@Test
public void testBasicAlignment() {
String md5 = "34eb4323742999d6d250a0aaa803c6d5";
String md5 = "a2bdf907b18114a86ca47f9fc23791bf";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + GATKDataLocation + "bwa/human_b36_both.fasta" +
" -T Align" +

View File

@ -14,7 +14,7 @@ import java.util.Arrays;
*/
public class ValidationAmpliconsIntegrationTest extends WalkerTest {
@Test
@Test(enabled=true)
public void testWikiExample() {
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
@ -27,7 +27,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
executeTest("Test probes", spec);
}
@Test
@Test(enabled=true)
public void testWikiExampleNoBWA() {
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";
@ -40,7 +40,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
executeTest("Test probes", spec);
}
@Test
@Test(enabled=true)
public void testWikiExampleMonoFilter() {
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf";

View File

@ -34,7 +34,6 @@ import org.testng.annotations.Test;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.*;
import javax.jws.soap.SOAPBinding;
import java.io.File;
/**
@ -55,25 +54,25 @@ public class LibBatIntegrationTest extends BaseTest {
@Test
public void testReadConfEnv() {
LibLsf.config_param[] unitsParam = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4);
LibLsf.config_param[] configParams = (LibLsf.config_param[]) new LibLsf.config_param().toArray(4);
unitsParam[0].paramName = "LSF_UNIT_FOR_LIMITS";
unitsParam[1].paramName = "LSF_CONFDIR";
unitsParam[2].paramName = "MADE_UP_PARAMETER";
configParams[0].paramName = "LSF_UNIT_FOR_LIMITS";
configParams[1].paramName = "LSF_CONFDIR";
configParams[2].paramName = "MADE_UP_PARAMETER";
Structure.autoWrite(unitsParam);
Structure.autoWrite(configParams);
if (LibLsf.ls_readconfenv(unitsParam[0], null) != 0) {
if (LibLsf.ls_readconfenv(configParams[0], null) != 0) {
Assert.fail(LibLsf.ls_sysmsg());
}
Structure.autoRead(unitsParam);
Structure.autoRead(configParams);
System.out.println("LSF_UNIT_FOR_LIMITS: " + unitsParam[0].paramValue);
Assert.assertNotNull(unitsParam[1].paramValue);
Assert.assertNull(unitsParam[2].paramValue);
Assert.assertNull(unitsParam[3].paramName);
Assert.assertNull(unitsParam[3].paramValue);
System.out.println("LSF_UNIT_FOR_LIMITS: " + configParams[0].paramValue);
Assert.assertNotNull(configParams[1].paramValue);
Assert.assertNull(configParams[2].paramValue);
Assert.assertNull(configParams[3].paramName);
Assert.assertNull(configParams[3].paramValue);
}
@Test

View File

@ -41,6 +41,7 @@
<!-- Picard -->
<package name="net.sf.picard.**" />
<package name="edu.mit.broad.picard.**" />
<!-- JavaMail -->
<package name="javax.activation.**" />

View File

@ -15,8 +15,8 @@ class GATKResourcesBundle extends QScript {
@Argument(doc="liftOverPerl", required=false)
var liftOverPerl: File = new File("./perl/liftOverVCF.pl")
@Argument(shortName = "svn", doc="The SVN version of this release", required=true)
var SVN_VERSION: String = _
@Argument(shortName = "ver", doc="The SVN version of this release", required=true)
var VERSION: String = _
@Argument(shortName = "bundleDir", doc="Path to root where resource files will be placed", required=false)
val BUNDLE_ROOT = new File("/humgen/gsa-hpprojects/GATK/bundle")
@ -32,8 +32,8 @@ class GATKResourcesBundle extends QScript {
val SITES_EXT: String = "sites"
def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + SVN_VERSION
def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + SVN_VERSION
def BUNDLE_DIR: File = BUNDLE_ROOT + "/" + VERSION
def DOWNLOAD_DIR: File = DOWNLOAD_ROOT + "/" + VERSION
// REFERENCES
class Reference( val name: String, val file: File ) { }
@ -113,6 +113,12 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource(hg19.file, "", hg19, false))
addResource(new Resource(hg18.file, "", hg18, false))
//
// The b37_decoy reference
//
addResource(new Resource("/humgen/1kg/reference/human_g1k_v37_decoy.fasta",
"IGNORE", b37, false, false))
//
// standard VCF files. Will be lifted to each reference
//

View File

@ -42,8 +42,8 @@ class RecalibrateBaseQualities extends QScript {
val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
val recalBam: File = swapExt(bam, ".bam", ".recal.bam")
val path1: String = bam + ".before"
val path2: String = bam + ".after"
val path1: String = recalBam + ".before"
val path2: String = recalBam + ".after"
add(cov(bam, recalFile1),
recal(bam, recalFile1, recalBam),

View File

@ -45,7 +45,7 @@ class QSettings {
var jobPriority: Option[Int] = None
@Argument(fullName="default_memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
var memoryLimit: Option[Int] = None
var memoryLimit: Option[Double] = None
@Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false)
var runDirectory = new File(".")

View File

@ -33,12 +33,29 @@ import org.broadinstitute.sting.queue.util.{Logging, IOUtils}
*/
trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging {
/** The string representation of the identifier of the running job. */
def jobIdString: String = null
/** A generated exec shell script. */
protected var jobScript: File = _
/** Which directory to use for the job status files. */
protected def jobStatusDir = function.jobTempDir
/** Amount of time a job can go without status before giving up. */
private val unknownStatusMaxSeconds = 5 * 60
/** Last known status */
protected var lastStatus: RunnerStatus.Value = _
/** The last time the status was updated */
protected var lastStatusUpdate: Long = _
final override def status = this.lastStatus
def residentRequestMB: Option[Double] = function.memoryLimit.map(_ * 1024)
def residentLimitMB: Option[Double] = residentRequestMB.map( _ * 1.2 )
override def init() {
super.init()
var exec = new StringBuilder
@ -53,7 +70,21 @@ trait CommandLineJobRunner extends JobRunner[CommandLineFunction] with Logging {
}
exec.append(function.commandLine)
this.jobScript = IOUtils.writeTempFile(exec.toString, ".exec", "", jobStatusDir)
this.jobScript = IOUtils.writeTempFile(exec.toString(), ".exec", "", jobStatusDir)
}
protected def updateStatus(updatedStatus: RunnerStatus.Value) {
this.lastStatus = updatedStatus
this.lastStatusUpdate = System.currentTimeMillis
}
override def checkUnknownStatus() {
val unknownStatusMillis = (System.currentTimeMillis - lastStatusUpdate)
if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
// Unknown status has been returned for a while now.
updateStatus(RunnerStatus.FAILED)
logger.error("Unable to read status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), jobIdString, function.description))
}
}
override def cleanup() {

View File

@ -44,9 +44,9 @@ trait JobManager[TFunction <: QFunction, TRunner <: JobRunner[TFunction]] {
/**
* Updates the status on a list of functions.
* @param runners Runners to update.
* @return runners which were updated.
*/
def updateStatus(runners: Set[TRunner]) {
}
def updateStatus(runners: Set[TRunner]): Set[TRunner] = Set.empty
/**
* Stops a list of functions.

View File

@ -52,6 +52,11 @@ trait JobRunner[TFunction <: QFunction] {
*/
def status: RunnerStatus.Value
/**
* Checks if the status has been unknown for an extended period of time.
*/
def checkUnknownStatus() {}
/**
* Returns the function to be run.
*/

View File

@ -1005,7 +1005,10 @@ class QGraph extends Logging {
.asInstanceOf[Set[JobRunner[QFunction]]]
if (managerRunners.size > 0)
try {
manager.updateStatus(managerRunners)
val updatedRunners = manager.updateStatus(managerRunners)
for (runner <- managerRunners.diff(updatedRunners)) {
runner.checkUnknownStatus()
}
} catch {
case e => /* ignore */
}

View File

@ -40,12 +40,7 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine
/** Job Id of the currently executing job. */
private var jobId: String = _
/** Last known status */
private var lastStatus: RunnerStatus.Value = _
/** The last time the status was updated */
protected var lastStatusUpdate: Long = _
override def jobIdString = jobId
def start() {
GridEngineJobRunner.gridEngineSession.synchronized {
@ -82,11 +77,14 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine
nativeSpecString += " -q " + function.jobQueue
}
// If the memory limit is set (GB) specify the memory limit
if (function.memoryLimit.isDefined) {
val memAvl: String = function.memoryLimit.get + "G"
val memMax: String = (function.memoryLimit.get * 1.2 * 1024).ceil.toInt + "M"
nativeSpecString += " -l mem_free=" + memAvl + ",h_rss=" + memMax
// If the resident set size is requested pass on the memory request
if (residentRequestMB.isDefined) {
nativeSpecString += " -l mem_free=%dM".format(residentRequestMB.get.ceil.toInt)
}
// If the resident set size limit is defined specify the memory limit
if (residentLimitMB.isDefined) {
nativeSpecString += " -l h_rss=%dM".format(residentLimitMB.get.ceil.toInt)
}
// If the priority is set (user specified Int) specify the priority
@ -121,21 +119,11 @@ class GridEngineJobRunner(val function: CommandLineFunction) extends CommandLine
logger.info("Submitted Grid Engine job id: " + jobId)
}
}
def status = this.lastStatus
private def updateStatus(updatedStatus: RunnerStatus.Value) {
this.lastStatus = updatedStatus
this.lastStatusUpdate = System.currentTimeMillis
}
}
object GridEngineJobRunner extends Logging {
private val gridEngineSession = SessionFactory.getFactory.getSession
/** Amount of time a job can go without status before giving up. */
private val unknownStatusMaxSeconds = 5 * 60
initGridEngine()
/**
@ -156,16 +144,14 @@ object GridEngineJobRunner extends Logging {
/**
* Updates the status of a list of jobs.
* @param runners Runners to update.
* @return runners which were updated.
*/
def updateStatus(runners: Set[GridEngineJobRunner]) {
def updateStatus(runners: Set[GridEngineJobRunner]) = {
var updatedRunners = Set.empty[GridEngineJobRunner]
gridEngineSession.synchronized {
runners.foreach(runner => if (updateRunnerStatus(runner)) {updatedRunners += runner})
}
for (runner <- runners.diff(updatedRunners)) {
checkUnknownStatus(runner)
}
updatedRunners
}
/**
@ -219,20 +205,11 @@ object GridEngineJobRunner extends Logging {
logger.warn("Unable to determine status of Grid Engine job id " + runner.jobId, de)
}
Option(returnStatus) match {
case Some(returnStatus) =>
runner.updateStatus(returnStatus)
return true
case None => return false
}
}
private def checkUnknownStatus(runner: GridEngineJobRunner) {
val unknownStatusSeconds = (System.currentTimeMillis - runner.lastStatusUpdate)
if (unknownStatusSeconds > (unknownStatusMaxSeconds * 1000L)) {
// Unknown status has been returned for a while now.
runner.updateStatus(RunnerStatus.FAILED)
logger.error("Unable to read Grid Engine status for %d minutes: job id %d: %s".format(unknownStatusSeconds/60, runner.jobId, runner.function.description))
if (returnStatus != null) {
runner.updateStatus(returnStatus)
true
} else {
false
}
}

View File

@ -34,6 +34,6 @@ class Lsf706JobManager extends CommandLineJobManager[Lsf706JobRunner] {
def runnerType = classOf[Lsf706JobRunner]
def create(function: CommandLineFunction) = new Lsf706JobRunner(function)
override def updateStatus(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.updateStatus(runners) }
override def updateStatus(runners: Set[Lsf706JobRunner]) = { Lsf706JobRunner.updateStatus(runners) }
override def tryStop(runners: Set[Lsf706JobRunner]) { Lsf706JobRunner.tryStop(runners) }
}

View File

@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.Utils
import org.broadinstitute.sting.jna.clibrary.LibC
import org.broadinstitute.sting.jna.lsf.v7_0_6.LibBat.{submitReply, submit}
import com.sun.jna.ptr.IntByReference
import com.sun.jna.{StringArray, NativeLong}
import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner}
import com.sun.jna.{Structure, StringArray, NativeLong}
/**
* Runs jobs on an LSF compute cluster.
@ -45,12 +45,7 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
/** Job Id of the currently executing job. */
private var jobId = -1L
/** Last known status */
private var lastStatus: RunnerStatus.Value = _
/** The last time the status was updated */
protected var lastStatusUpdate: Long = _
override def jobIdString = jobId.toString
/**
* Dispatches the function on the LSF cluster.
@ -85,12 +80,19 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
request.options |= LibBat.SUB_QUEUE
}
// If the memory limit is set (GB) specify the memory limit
if (function.memoryLimit.isDefined) {
request.resReq = "rusage[mem=" + function.memoryLimit.get + "]"
// If the resident set size is requested pass on the memory request
if (residentRequestMB.isDefined) {
val memInUnits = Lsf706JobRunner.convertUnits(residentRequestMB.get)
request.resReq = "select[mem>%1$d] rusage[mem=%1$d]".format(memInUnits)
request.options |= LibBat.SUB_RES_REQ
}
// If the resident set size limit is defined specify the memory limit
if (residentLimitMB.isDefined) {
val memInUnits = Lsf706JobRunner.convertUnits(residentLimitMB.get)
request.rLimits(LibLsf.LSF_RLIMIT_RSS) = memInUnits
}
// If the priority is set (user specified Int) specify the priority
if (function.jobPriority.isDefined) {
request.userPriority = function.jobPriority.get
@ -122,11 +124,13 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
}
}
def status = this.lastStatus
private def updateStatus(updatedStatus: RunnerStatus.Value) {
this.lastStatus = updatedStatus
this.lastStatusUpdate = System.currentTimeMillis
override def checkUnknownStatus() {
// TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs.
// Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(jobId))
super.checkUnknownStatus()
}
}
@ -137,17 +141,8 @@ object Lsf706JobRunner extends Logging {
/** Number of seconds for a non-normal exit status before we give up on expecting LSF to retry the function. */
private val retryExpiredSeconds = 5 * 60
/** Amount of time a job can go without status before giving up. */
private val unknownStatusMaxSeconds = 5 * 60
initLsf()
/** The name of the default queue. */
private var defaultQueue: String = _
/** The run limits for each queue. */
private var queueRlimitRun = Map.empty[String,Int]
/**
* Initialize the Lsf library.
*/
@ -161,8 +156,9 @@ object Lsf706JobRunner extends Logging {
/**
* Bulk updates job statuses.
* @param runners Runners to update.
* @return runners which were updated.
*/
def updateStatus(runners: Set[Lsf706JobRunner]) {
def updateStatus(runners: Set[Lsf706JobRunner]) = {
var updatedRunners = Set.empty[Lsf706JobRunner]
Lsf706JobRunner.lsfLibLock.synchronized {
@ -192,70 +188,7 @@ object Lsf706JobRunner extends Logging {
}
}
for (runner <- runners.diff(updatedRunners)) {
checkUnknownStatus(runner)
}
}
/**
* Tries to stop any running jobs.
* @param runners Runners to stop.
*/
def tryStop(runners: Set[Lsf706JobRunner]) {
lsfLibLock.synchronized {
// lsb_killbulkjobs does not seem to forward SIGTERM,
// only SIGKILL, so send the Ctrl-C (SIGTERM) one by one.
for (runner <- runners.filterNot(_.jobId < 0)) {
try {
if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0)
logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId))
} catch {
case e =>
logger.error("Unable to kill job " + runner.jobId, e)
}
}
}
}
/**
* Returns the run limit in seconds for the queue.
* If the queue name is null returns the length of the default queue.
* @param queue Name of the queue or null for the default queue.
* @return the run limit in seconds for the queue.
*/
private def getRlimitRun(queue: String) = {
lsfLibLock.synchronized {
if (queue == null) {
if (defaultQueue != null) {
queueRlimitRun(defaultQueue)
} else {
// Get the info on the default queue.
val numQueues = new IntByReference(1)
val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0)
if (queueInfo == null)
throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue"))
defaultQueue = queueInfo.queue
val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN)
queueRlimitRun += defaultQueue -> limit
limit
}
} else {
queueRlimitRun.get(queue) match {
case Some(limit) => limit
case None =>
// Cache miss. Go get the run limits from LSF.
val queues = new StringArray(Array[String](queue))
val numQueues = new IntByReference(1)
val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0)
if (queueInfo == null)
throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue))
val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN)
queueRlimitRun += queue -> limit
limit
}
}
}
updatedRunners
}
private def updateRunnerStatus(runner: Lsf706JobRunner, jobInfo: LibBat.jobInfoEnt) {
@ -280,20 +213,6 @@ object Lsf706JobRunner extends Logging {
)
}
private def checkUnknownStatus(runner: Lsf706JobRunner) {
// TODO: Need a second pass through either of the two archive logs using lsb_geteventrecbyline() for disappeared jobs.
// Can also tell if we wake up and the last time we saw status was greater than lsb_parameterinfo().cleanPeriod
// LSB_SHAREDIR/cluster_name/logdir/lsb.acct (man bacct)
// LSB_SHAREDIR/cluster_name/logdir/lsb.events (man bhist)
logger.debug("Job Id %s status / exitStatus / exitInfo: ??? / ??? / ???".format(runner.jobId))
val unknownStatusMillis = (System.currentTimeMillis - runner.lastStatusUpdate)
if (unknownStatusMillis > (unknownStatusMaxSeconds * 1000L)) {
// Unknown status has been returned for a while now.
runner.updateStatus(RunnerStatus.FAILED)
logger.error("Unable to read LSF status for %0.2f minutes: job id %d: %s".format(unknownStatusMillis/(60 * 1000D), runner.jobId, runner.function.description))
}
}
/**
* Returns true if LSF is expected to retry running the function.
* @param exitInfo The reason the job exited.
@ -309,4 +228,86 @@ object Lsf706JobRunner extends Logging {
}
}
}
/**
* Tries to stop any running jobs.
* @param runners Runners to stop.
*/
def tryStop(runners: Set[Lsf706JobRunner]) {
lsfLibLock.synchronized {
// lsb_killbulkjobs does not seem to forward SIGTERM,
// only SIGKILL, so send the Ctrl-C (SIGTERM) one by one.
for (runner <- runners.filterNot(_.jobId < 0)) {
try {
if (LibBat.lsb_signaljob(runner.jobId, SIGTERM) < 0)
logger.error(LibBat.lsb_sperror("Unable to kill job " + runner.jobId))
} catch {
case e =>
logger.error("Unable to kill job " + runner.jobId, e)
}
}
}
}
/** The name of the default queue. */
private lazy val defaultQueue: String = {
lsfLibLock.synchronized {
val numQueues = new IntByReference(1)
val queueInfo = LibBat.lsb_queueinfo(null, numQueues, null, null, 0)
if (queueInfo == null)
throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for the default queue"))
queueInfo.queue
}
}
/** The run limits for each queue. */
private var queueRlimitRun = Map.empty[String,Int]
/**
* Returns the run limit in seconds for the queue.
* If the queue name is null returns the length of the default queue.
* @param queue Name of the queue or null for the default queue.
* @return the run limit in seconds for the queue.
*/
private def getRlimitRun(queueName: String) = {
lsfLibLock.synchronized {
val queue = if (queueName == null) defaultQueue else queueName
queueRlimitRun.get(queue) match {
case Some(limit) => limit
case None =>
// Cache miss. Go get the run limits from LSF.
val queues = new StringArray(Array(queue))
val numQueues = new IntByReference(1)
val queueInfo = LibBat.lsb_queueinfo(queues, numQueues, null, null, 0)
if (queueInfo == null)
throw new QException(LibBat.lsb_sperror("Unable to get LSF queue info for queue: " + queue))
val limit = queueInfo.rLimits(LibLsf.LSF_RLIMIT_RUN)
queueRlimitRun += queue -> limit
limit
}
}
}
private lazy val unitDivisor: Double = {
lsfLibLock.synchronized {
val unitsParam: Array[LibLsf.config_param] = new LibLsf.config_param().toArray(2).asInstanceOf[Array[LibLsf.config_param]]
unitsParam(0).paramName = "LSF_UNIT_FOR_LIMITS"
Structure.autoWrite(unitsParam.asInstanceOf[Array[Structure]])
if (LibLsf.ls_readconfenv(unitsParam(0), null) != 0)
throw new QException(LibBat.lsb_sperror("ls_readconfenv() failed"))
Structure.autoRead(unitsParam.asInstanceOf[Array[Structure]])
unitsParam(0).paramValue match {
case "MB" => 1D
case "GB" => 1024D
case "TB" => 1024D * 1024
case "PB" => 1024D * 1024 * 1024
case "EB" => 1024D * 1024 * 1024 * 1024
case null => 1D
}
}
}
private def convertUnits(mb: Double) = (mb / unitDivisor).ceil.toInt
}

View File

@ -50,10 +50,10 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu
// Allow advanced users to update the job.
updateJobRun(job)
runStatus = RunnerStatus.RUNNING
updateStatus(RunnerStatus.RUNNING)
job.run()
runStatus = RunnerStatus.DONE
updateStatus(RunnerStatus.FAILED)
}
def status = runStatus
override def checkUnknownStatus() {}
}

View File

@ -9,7 +9,7 @@ trait CommandLineFunction extends QFunction with Logging {
def commandLine: String
/** Upper memory limit */
var memoryLimit: Option[Int] = None
var memoryLimit: Option[Double] = None
/** Job project to run the command */
var jobProject: String = _
@ -56,7 +56,7 @@ trait CommandLineFunction extends QFunction with Logging {
if (memoryLimit.isEmpty)
memoryLimit = qSettings.memoryLimit
super.freezeFieldValues
super.freezeFieldValues()
}
/**

View File

@ -47,7 +47,7 @@ trait JavaCommandLineFunction extends CommandLineFunction {
/**
* Memory limit for the java executable, or if None will use the default memoryLimit.
*/
var javaMemoryLimit: Option[Int] = None
var javaMemoryLimit: Option[Double] = None
/**
* Returns the java executable to run.
@ -61,8 +61,8 @@ trait JavaCommandLineFunction extends CommandLineFunction {
null
}
override def freezeFieldValues = {
super.freezeFieldValues
override def freezeFieldValues() {
super.freezeFieldValues()
if (javaMemoryLimit.isEmpty && memoryLimit.isDefined)
javaMemoryLimit = memoryLimit
@ -72,7 +72,7 @@ trait JavaCommandLineFunction extends CommandLineFunction {
}
def javaOpts = "%s -Djava.io.tmpdir=%s"
.format(optional(" -Xmx", javaMemoryLimit, "g"), jobTempDir)
.format(optional(" -Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m"), jobTempDir)
def commandLine = "java%s %s"
.format(javaOpts, javaExecutable)

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
class HelloWorldPipelineTest {
@Test
def testHelloWorld {
def testHelloWorld() {
val spec = new PipelineTestSpec
spec.name = "HelloWorld"
spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala"
@ -37,15 +37,23 @@ class HelloWorldPipelineTest {
}
@Test
def testHelloWorldWithPrefix {
def testHelloWorldWithPrefix() {
val spec = new PipelineTestSpec
spec.name = "HelloWorldWithPrefix"
spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPrefix HelloWorld"
PipelineTest.executeTest(spec)
}
@Test
def testHelloWorldWithMemoryLimit() {
val spec = new PipelineTestSpec
spec.name = "HelloWorldWithPrefix"
spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -memLimit 1.25"
PipelineTest.executeTest(spec)
}
@Test(enabled=false)
def testHelloWorldWithPriority {
def testHelloWorldWithPriority() {
val spec = new PipelineTestSpec
spec.name = "HelloWorldWithPriority"
spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala -jobPriority 100"