From 542d394e09643d4d349cab1fabdb7e97022b245e Mon Sep 17 00:00:00 2001 From: kshakir Date: Fri, 13 Aug 2010 15:54:08 +0000 Subject: [PATCH] Cleaning up Queue debugging output. -l DEBUG with local programs now prints out the stdout/stderr of the programs as they are run. More documentation in the examples with a new even simpler CountReads example. Took out unused option to build Queue GATK extensions separately. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4025 348d0f76-0448-11de-a6fe-93d51630548a --- build.xml | 30 ++----- .../sting/commandline/CommandLineProgram.java | 48 ++++++----- packages/Queue.xml | 4 + packages/QueueGenomeAnalysisTK.xml | 16 ---- scala/qscript/UnifiedGenotyperExample.scala | 56 ------------- .../qscript/examples/ExampleCountReads.scala | 63 +++++++++++++++ .../examples/ExampleUnifiedGenotyper.scala | 80 +++++++++++++++++++ .../sting/queue/QCommandLine.scala | 1 + .../sting/queue/QScriptManager.scala | 12 +-- .../sting/queue/engine/QGraph.scala | 35 ++++---- .../sting/queue/util/ProcessController.scala | 12 ++- .../sting/queue/util/ShellJob.scala | 13 +-- 12 files changed, 219 insertions(+), 151 deletions(-) delete mode 100644 packages/QueueGenomeAnalysisTK.xml delete mode 100644 scala/qscript/UnifiedGenotyperExample.scala create mode 100644 scala/qscript/examples/ExampleCountReads.scala create mode 100644 scala/qscript/examples/ExampleUnifiedGenotyper.scala diff --git a/build.xml b/build.xml index de1ef54f1..038c4f916 100644 --- a/build.xml +++ b/build.xml @@ -116,7 +116,6 @@ - @@ -144,35 +143,22 @@ - - - - - - - - - - - - + - - @@ -225,7 +211,7 @@ - + Generating Queue GATK extensions... @@ -236,7 +222,7 @@ - + Building Queue GATK extensions... @@ -326,7 +312,7 @@ - + @@ -335,9 +321,7 @@ - - @@ -345,7 +329,7 @@ - + @@ -397,9 +381,7 @@ - - @@ -407,7 +389,7 @@ - + diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index 89fd143e2..3c31e5443 100644 --- a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -153,6 +153,16 @@ public abstract class CommandLineProgram { // setup our log layout PatternLayout layout = new PatternLayout(); + // now set the layout of all the loggers to our layout + Enumeration en = logger.getAllAppenders(); + for (; en.hasMoreElements();) { + Appender app = en.nextElement(); + app.setLayout(layout); + } + + // Initialize the logger using the defaults. + clp.setupLoggerLevel(layout); + // setup the parser ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); @@ -170,6 +180,9 @@ public abstract class CommandLineProgram { ParsingEngine.ValidationType.InvalidArgument)); parser.loadArgumentsIntoObject(clp); + // Initialize the logger using the loaded command line. + clp.setupLoggerLevel(layout); + Class[] argumentSources = clp.getArgumentSources(); for (Class argumentSource : argumentSources) parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); @@ -187,21 +200,9 @@ public abstract class CommandLineProgram { parser.validate(); parser.loadArgumentsIntoObject(clp); - } - // if we're in debug mode, set the mode up - if (clp.debugMode) { - //logger.info("Setting debug"); - layout.setConversionPattern(debugPatternString); - } else { - //logger.info("not Setting debug"); - layout.setConversionPattern(patternString); - } - // now set the layout of all the loggers to our layout - Enumeration en = logger.getAllAppenders(); - for (; en.hasMoreElements();) { - Appender app = en.nextElement(); - app.setLayout(layout); + // Initialize the logger using the loaded command line. + clp.setupLoggerLevel(layout); } // if they set the mode to quiet @@ -226,9 +227,6 @@ public abstract class CommandLineProgram { } } - // set the default logger level - clp.setupLoggerLevel(); - // regardless of what happens next, generate the header information HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); @@ -283,8 +281,20 @@ public abstract class CommandLineProgram { /** * this function checks the logger level passed in on the command line, taking the lowest * level that was provided. + * @param layout Pattern layout to format based on the logger level. */ - private void setupLoggerLevel() { + @SuppressWarnings("unchecked") + private void setupLoggerLevel(PatternLayout layout) { + // if we're in debug mode, set the mode up + if (debugMode) { + //logger.info("Setting debug"); + layout.setConversionPattern(debugPatternString); + } else { + //logger.info("not Setting debug"); + layout.setConversionPattern(patternString); + } + + // set the default logger level Level par; if (logging_level.toUpperCase().equals("DEBUG")) { par = Level.DEBUG; @@ -385,7 +395,7 @@ public abstract class CommandLineProgram { * @param e the exception */ public void generateErrorLog(PrintStream stream, Exception e) { - stream.println(e.getStackTrace().toString()); + e.printStackTrace(stream); } /** diff --git a/packages/Queue.xml b/packages/Queue.xml index 50b6dcd3b..951064070 100644 --- a/packages/Queue.xml +++ b/packages/Queue.xml @@ -1,5 +1,6 @@ + @@ -9,6 +10,9 @@ + + + diff --git a/packages/QueueGenomeAnalysisTK.xml b/packages/QueueGenomeAnalysisTK.xml deleted file mode 100644 index 0736f3462..000000000 --- a/packages/QueueGenomeAnalysisTK.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/scala/qscript/UnifiedGenotyperExample.scala b/scala/qscript/UnifiedGenotyperExample.scala deleted file mode 100644 index e44a3505f..000000000 --- a/scala/qscript/UnifiedGenotyperExample.scala +++ /dev/null @@ -1,56 +0,0 @@ -import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.QScript - -class UnifiedGenotyperExample extends QScript { - qscript => - - @Input(doc="gatk jar file") - var gatkJar: File = _ - - @Input(doc="bam files", shortName="I") - var bamFiles: List[File] = Nil - - @Input(doc="interval list", shortName="L") - var intervals: File = _ - - @Input(doc="referenceFile", shortName="R") - var referenceFile: File = _ - - @Argument(doc="filter names", shortName="filter") - var filterNames: List[String] = Nil - - @Argument(doc="filter expressions", shortName="filterExpression") - var filterExpressions: List[String] = Nil - - @Argument(doc="job queue", shortName="queue", required=false) - var jobQueue = "broad" - - trait UnifiedGenotyperArguments extends CommandLineGATK { - this.jobQueue = qscript.jobQueue - this.jarFile = qscript.gatkJar - this.intervals = qscript.intervals - this.reference_sequence = qscript.referenceFile - } - - def script = { - for (bam <- bamFiles) { - val ug = new UnifiedGenotyper with UnifiedGenotyperArguments - val vf = new VariantFiltration with UnifiedGenotyperArguments - val ve = new VariantEval with UnifiedGenotyperArguments - - // Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh. - ug.scatterCount = 3 - ug.cleanupTempDirectories = true - ug.input_file :+= bam - ug.out = swapExt(bam, "bam", "unfiltered.vcf") - - vf.rodBind :+= RodBind("vcf", "VCF", ug.out) - vf.out = swapExt(bam, "bam", "filtered.vcf") - - ve.rodBind :+= RodBind("vcf", "VCF", vf.out) - ve.out = swapExt(bam, "bam", "eval") - - add(ug, vf, ve) - } - } -} diff --git a/scala/qscript/examples/ExampleCountReads.scala b/scala/qscript/examples/ExampleCountReads.scala new file mode 100644 index 000000000..c02362fa5 --- /dev/null +++ b/scala/qscript/examples/ExampleCountReads.scala @@ -0,0 +1,63 @@ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ + +/** + * An introductory pipeline for Queue. + * Runs the GATK CountReads individually and across a set of bams. + * All bams must have the same reference. + */ +class ExampleCountReads extends QScript { + @Input(doc="The path to the GenomeAnalysisTK.jar file.", shortName="gatk") + var gatkJar: File = null + + @Input(doc="The reference file for the bam files.", shortName="R") + var referenceFile: File = null + + // NOTE: Do not initialize List, Set, or Option to null + // as you won't be able to update the collection. + // By default set: + // List[T] = Nil + // Set[T] = Set.empty[T] + // Option[T] = None + @Input(doc="One or more bam files.", shortName="I") + var bamFiles: List[File] = Nil + + /** + * In script, you create and then add() functions to the pipeline. + */ + def script = { + + // Run CountReads for all bams jointly. + + // Create a new CountReads from the Queue GATK Extensions. + // The names of walkers are the same as you would use for '-T ' + val jointCountReads = new CountReads + + // Set the GATK jar file for this command. + jointCountReads.jarFile = gatkJar + + // Each field in the extensions is based off of the full form of the arguments. + // To get the list of arguments and their descriptions run + // java -jar -T -help + jointCountReads.reference_sequence = referenceFile + + // GATK inputs that take more than one file will have a singular name which + // matches the full form of the argument, but will actually be a scala List[] + jointCountReads.input_file = bamFiles + + // Add the newly created function to the pipeline. + add(jointCountReads) + + // If there is more than one BAM, also run CountReads once for each bam. + if (bamFiles.size > 1) { + for (bamFile <- bamFiles) { + val singleCountReads = new CountReads + singleCountReads.jarFile = gatkJar + singleCountReads.reference_sequence = referenceFile + // ':+' is the scala List append operator + singleCountReads.input_file :+= bamFile + add(singleCountReads) + } + } + } +} diff --git a/scala/qscript/examples/ExampleUnifiedGenotyper.scala b/scala/qscript/examples/ExampleUnifiedGenotyper.scala new file mode 100644 index 000000000..f320cdcda --- /dev/null +++ b/scala/qscript/examples/ExampleUnifiedGenotyper.scala @@ -0,0 +1,80 @@ +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ + +/** + * An example building on the intro ExampleCountReads.scala. + * Runs an INCOMPLETE version of the UnifiedGenotyper with VariantEval and optional VariantFiltration. + * If run on a compute cluster, splits the UnifiedGenotyper into 3 parts. + */ +class ExampleUnifiedGenotyper extends QScript { + // Create an alias 'qscript' to be able to access variables + // in the ExampleUnifiedGenotyper. + // 'qscript' is now the same as 'ExampleUnifiedGenotyper.this' + qscript => + + + // Required arguments. All initialized to empty values. + + @Input(doc="The path to the GenomeAnalysisTK.jar file.", shortName="gatk") + var gatkJar: File = null // The command line must pass the gatk jar to this script via -gatk. + + @Input(doc="The reference file for the bam files.", shortName="R") + var referenceFile: File = _ // _ is scala shorthand for null + + @Input(doc="Bam file to genotype.", shortName="I") + var bamFile: File = _ + + // The following arguments are all optional. + + @Input(doc="An optional file with a list of intervals to proccess.", shortName="L", required=false) + var intervals: File = _ + + @Argument(doc="A optional list of filter names.", shortName="filter", required=false) + var filterNames: List[String] = Nil // Nil is an empty List, versus null which means a non-existent List. + + @Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false) + var filterExpressions: List[String] = Nil + + + // This trait allows us set the variables below in one place, + // and then reuse this trait on each CommandLineGATK function below. + trait UnifiedGenotyperArguments extends CommandLineGATK { + this.jarFile = qscript.gatkJar + this.reference_sequence = qscript.referenceFile + this.intervals = qscript.intervals + // Some() is how you set the value for an scala Option. + // Set the memory limit to 2 gigabytes on each command. + this.memoryLimit = Some(2) + } + + + def script = { + // Create the four function that we can run. + val genotyper = new UnifiedGenotyper with UnifiedGenotyperArguments + val variantFilter = new VariantFiltration with UnifiedGenotyperArguments + val evalUnfiltered = new VariantEval with UnifiedGenotyperArguments + val evalFiltered = new VariantEval with UnifiedGenotyperArguments + + // If you are running this on a compute farm, make sure that the Sting/shell + // folder is in your path to use mergeText.sh and splitIntervals.sh. + genotyper.scatterCount = 3 + genotyper.input_file :+= qscript.bamFile + genotyper.variants_out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf") + + evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out) + evalUnfiltered.out = swapExt(genotyper.variants_out, "vcf", "eval") + + variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out) + variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf") + variantFilter.filterName = filterNames + variantFilter.filterExpression = filterExpressions + + evalFiltered.rodBind :+= RodBind("vcf", "VCF", variantFilter.out) + evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval") + + add(genotyper, evalUnfiltered) + // Only add variant filtration to the pipeline if filters were passed in + if (filterNames.size > 0) + add(variantFilter, evalFiltered) + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index 34e2f09bd..78956e8bc 100755 --- a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -51,6 +51,7 @@ class QCommandLine extends CommandLineProgram with Logging { qGraph.dotFile = dotFile qGraph.expandedDotFile = expandedDotFile qGraph.qSettings = qSettings + qGraph.debugMode = debugMode == true val scripts = qScriptManager.createScripts() for (script <- scripts) { diff --git a/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala index 1a8fdeff9..4873ccf77 100644 --- a/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala +++ b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala @@ -21,9 +21,9 @@ class QScriptManager extends PluginManager[QScript](classOf[QScript], "QScript", * @return QScripts classes found in the classpath. */ def getValues = { - if (logger.isTraceEnabled) { - logger.trace(JavaConversions.asMap(this.pluginsByName) - .foreach{case (name, clazz) => "Found QScript %s: %s".format(name, clazz)}) + if (logger.isDebugEnabled) { + JavaConversions.asMap(this.pluginsByName) + .foreach{case (name, clazz) => logger.debug("Found QScript %s: %s".format(name, clazz))} } JavaConversions.asIterable(this.pluginsByName.values).toArray } @@ -59,8 +59,8 @@ object QScriptManager extends Logging { val compiler = new Global(settings, reporter) val run = new compiler.Run - logger.debug("Compiling %s QScript%s".format(scripts.size, plural(scripts.size))) - logger.trace("Compilation directory: " + settings.outdir.value) + logger.info("Compiling %s QScript%s".format(scripts.size, plural(scripts.size))) + logger.debug("Compilation directory: " + settings.outdir.value) run.compileFiles(scripts.map(new PlainFile(_))) reporter.printSummary() @@ -73,7 +73,7 @@ object QScriptManager extends Logging { logger.warn("Compile succeeded with %d warning%s".format( reporter.WARNING.count, plural(reporter.WARNING.count))) else - logger.debug("Compilation complete") + logger.info("Compilation complete") // Add the new compilation output directory to the classpath. ClasspathUtils.addClasspath(outdir) diff --git a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index c5e590c9e..f33831163 100755 --- a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -25,6 +25,7 @@ class QGraph extends Logging { var dotFile: File = _ var expandedDotFile: File = _ var qSettings: QSettings = _ + var debugMode = false private val jobGraph = newGraph /** @@ -55,8 +56,8 @@ class QGraph extends Logging { var addedFunctions = List.empty[CommandLineFunction] for (scatterGather <- scatterGathers) { val functions = scatterGather.generateFunctions() - if (logger.isTraceEnabled) - logger.trace("Scattered into %d parts: %n%s".format(functions.size, functions.mkString("%n".format()))) + if (this.debugMode) + logger.debug("Scattered into %d parts: %n%s".format(functions.size, functions.mkString("%n".format()))) addedFunctions ++= functions } @@ -157,7 +158,7 @@ class QGraph extends Logging { } /** - * Removes mapping edges that aren't being used, and nodes that don't belong to anything. + * Removes mapping edges that aren't being used, and nodes that don't belong to anything. */ private def prune = { var pruning = true @@ -211,9 +212,9 @@ class QGraph extends Logging { val numJobs = JavaConversions.asSet(jobGraph.edgeSet).filter(_.isInstanceOf[CommandLineFunction]).size logger.info("Number of jobs: %s".format(numJobs)) - if (logger.isTraceEnabled) { + if (this.debugMode) { val numNodes = jobGraph.vertexSet.size - logger.trace("Number of nodes: %s".format(numNodes)) + logger.debug("Number of nodes: %s".format(numNodes)) } var numNodes = 0 @@ -221,14 +222,14 @@ class QGraph extends Logging { edgeFunction = { case f => runner.run(f, this) }, nodeFunction = { case node => { - if (logger.isTraceEnabled) - logger.trace("Visiting: " + node) + if (this.debugMode) + logger.debug("Visiting: " + node) numNodes += 1 } }) - if (logger.isTraceEnabled) - logger.trace("Done walking %s nodes.".format(numNodes)) + if (this.debugMode) + logger.debug("Done walking %s nodes.".format(numNodes)) if (bsubAllJobs && bsubWaitJobs) { logger.info("Waiting for jobs to complete.") @@ -263,14 +264,14 @@ class QGraph extends Logging { val newTarget = jobGraph.addVertex(outputs) val removedEdges = jobGraph.removeAllEdges(inputs, outputs) val added = jobGraph.addEdge(inputs, outputs, f) - if (logger.isTraceEnabled) { - logger.trace("Mapped from: " + inputs) - logger.trace("Mapped to: " + outputs) - logger.trace("Mapped via: " + f) - logger.trace("Removed edges: " + removedEdges) - logger.trace("New source?: " + newSource) - logger.trace("New target?: " + newTarget) - logger.trace("") + if (this.debugMode) { + logger.debug("Mapped from: " + inputs) + logger.debug("Mapped to: " + outputs) + logger.debug("Mapped via: " + f) + logger.debug("Removed edges: " + removedEdges) + logger.debug("New source?: " + newSource) + logger.debug("New target?: " + newTarget) + logger.debug("") } } catch { case e: Exception => diff --git a/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala index 80162582e..9e278302b 100644 --- a/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala @@ -2,6 +2,7 @@ package org.broadinstitute.sting.queue.util import java.io._ import scala.collection.mutable.{HashSet, ListMap} +import scala.collection.JavaConversions /** * Facade to Runtime.exec() and java.lang.Process. Handles @@ -32,7 +33,7 @@ class ProcessController extends Logging { stderrCapture.start() /** - * Executes a command line program with the settings and waits for it to return, processing the output on a background thread. + * Executes a command line program with the settings and waits for it to return, processing the output on a background thread. * @param settings Settings to be run. * @return The output of the command. */ @@ -58,8 +59,8 @@ class ProcessController extends Logging { val stderrSettings = if (settings.stderrSettings == null) ProcessController.EmptyStreamSettings else settings.stderrSettings toCapture.synchronized { - toCapture.put(ProcessController.STDOUT_KEY, new ProcessController.CapturedStreamOutput(process.getInputStream, stdoutSettings)) - toCapture.put(ProcessController.STDERR_KEY, new ProcessController.CapturedStreamOutput(process.getErrorStream, stderrSettings)) + toCapture.put(ProcessController.STDOUT_KEY, new ProcessController.CapturedStreamOutput(process.getInputStream, stdoutSettings, scala.Console.out)) + toCapture.put(ProcessController.STDERR_KEY, new ProcessController.CapturedStreamOutput(process.getErrorStream, stderrSettings, scala.Console.err)) toCapture.notifyAll() } @@ -275,7 +276,7 @@ object ProcessController extends Logging { * @param stream Stream to capture output. * @param settings Settings that define what to capture. */ - private class CapturedStreamOutput(val stream: InputStream, val settings: OutputStreamSettings) extends StreamOutput { + private class CapturedStreamOutput(val stream: InputStream, val settings: OutputStreamSettings, val debugStream: PrintStream) extends StreamOutput { /** * Returns the captured content as a string. * @return The captured content as a string. @@ -318,6 +319,9 @@ object ProcessController extends Logging { * @param len Number of characters in the buffer. */ private def writeString(chars: Array[Char], len: Int) = { + // If debug is enabled bypass the logger and dump directly to the screen + if (logger.isDebugEnabled) + debugStream.print(new String(chars, 0, len)) if (settings.stringSize < 0) { stringWriter.write(chars, 0, len) } else { diff --git a/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala index e4f8f2899..b9458f4c6 100755 --- a/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala @@ -13,7 +13,7 @@ class ShellJob extends CommandLineJob with Logging { assert(command != null, "Command was not set on job") val (redirectError, errorFile) = if (this.errorFile == null) (true, null) else (false, this.errorFile) - val bufferSize = if (logger.isDebugEnabled) FIVE_MB else 0 + val bufferSize = if (redirectError || logger.isDebugEnabled) FIVE_MB else 0 val stdinSettings = new ProcessController.InputStreamSettings(null, this.inputFile) val stdoutSettings = new ProcessController.OutputStreamSettings(bufferSize, this.outputFile, true) val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, errorFile, true) @@ -22,15 +22,10 @@ class ShellJob extends CommandLineJob with Logging { val output = processController.exec(processSettings) - if (logger.isDebugEnabled) { - logger.debug("output: " + content(output.stdout)) - logger.debug("error: " + content(output.stderr)) - logger.debug("Command exited with result: " + output.exitValue) - } - if (output.exitValue != 0) { - logger.error("Failed to run job, got exit code %s. Standard error contained: %n%s" - .format(output.exitValue, content(output.stderr))) + val streamOutput = if (redirectError) output.stdout else output.stderr + logger.error("Failed to run job, got exit code %s. Error contained: %n%s" + .format(output.exitValue, content(streamOutput))) throw new QException("Failed to run job, got exit code %s.".format(output.exitValue)) } }