Changed logging level to default at INFO instead of WARN.

Changes to StingUtils command line for use in Queue, replacing Queue's use of property files.
Updates to walkers used in existing QScripts to add @Input/@Output.
RMD used in @Required/@Allows now has a new default equal to "any" type.
New QueueGATKExtensions.jar generator for auto wrapping walkers as Queue CommandLineFunctions.
Added hooks to modify the functions that perform the Scattering and Gathering (setting their jar files, other arguments, etc.)
Removed dependency on BroadCore by porting LSF job submitter to scala.
Ivy now pulls down module dependencies from maven.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3984 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-08-09 16:42:48 +00:00
parent 30178c05c5
commit 4f51a02dea
90 changed files with 4533 additions and 2052 deletions

282
build.xml
View File

@ -7,7 +7,14 @@
<property name="java.source.dir" value="java/src" />
<property name="java.classes" value="${build.dir}/java/classes" />
<property name="resource.file" value="StingText.properties" />
<property name="resource.path" value="${java.classes}/StingText.properties" />
<property name="queue.source.dir" value="scala/src" />
<property name="queue.classes" value="${build.dir}/scala/classes" />
<property name="queue-gatk-extensions.source.dir" value="${build.dir}/queue-gatk-extensions/src" />
<property name="queue-gatk-extensions.classes" value="${build.dir}/queue-gatk-extensions/classes" />
<!-- where to find the tribble distro -->
<property name="tribble.dir" value="tribble" />
@ -24,9 +31,7 @@
<!-- If running the 'package' task, this property controls the name of the xml file to package -->
<property name="executable" value="GenomeAnalysisTK" />
<!-- Set target based on STING_BUILD_TYPE environment variable -->
<property environment="env"/>
<property name="target" value="${env.STING_BUILD_TYPE}" />
<dirset id="java.source.files" dir="${java.source.dir}">
<patternset>
@ -55,21 +60,28 @@
</fileset>
</path>
<path id="classpath">
<path refid="runtime.dependencies" />
<pathelement location="${java.classes}" />
</path>
<!-- ivy properties -->
<property name="ivy.install.version" value="2.0.0"/>
<property name="ivy.home" value="${user.home}/.ant"/>
<property name="ivy.jar.dir" value="${ivy.home}/lib"/>
<property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
<property name="ivy.settings.dir" value="settings"/>
<property file="${ivy.settings.dir}/ivysettings.properties"/>
<property name="ivy.conf" value="default"/>
<target name="resolve" depends="init"
description="locate and download library dependencies">
<!-- ivy properties -->
<property name="ivy.install.version" value="2.0.0"/>
<property name="ivy.home" value="${user.home}/.ant"/>
<property name="ivy.jar.dir" value="${ivy.home}/lib"/>
<property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
<property name="ivy.settings.dir" value="settings"/>
<property file="${ivy.settings.dir}/ivysettings.properties"/>
<property name="gatk.ivy.conf" value="default" />
<property name="scala.ivy.conf" value="default" />
<!-- Remove the reflections cache as the local dependency settings have changed. -->
<!-- Ok to remove after reflections goes back to a stable release. -->
<delete dir="${ivy.home}/cache/org.reflections/reflections"/>
<condition property="queue.ivy.conf" value="queue" else="default">
<isset property="queue.include" />
</condition>
<property name="ivy.conf" value="default, ${gatk.ivy.conf}, ${scala.ivy.conf}, ${queue.ivy.conf}"/>
<mkdir dir="${ivy.jar.dir}"/>
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
dest="${ivy.jar.dir}/${ivy.jar.file}"
@ -91,48 +103,144 @@
<format property="build.timestamp" pattern="yyyy/MM/dd HH:mm:ss"/>
</tstamp>
<echo message="Compiling module: ${target}"/>
<!-- Set target based on STING_BUILD_TYPE environment variable -->
<condition property="sting.target" value="core" else="${env.STING_BUILD_TYPE}">
<equals arg1="${env.STING_BUILD_TYPE}" arg2="$${env.STING_BUILD_TYPE}" />
</condition>
<!-- Get the gatk build target. Default to the STING_BUILD_TYPE. -->
<condition property="gatk.target" value="${sting.target}" else="${env.GATK_BUILD_TYPE}">
<equals arg1="${env.GATK_BUILD_TYPE}" arg2="$${env.GATK_BUILD_TYPE}" />
</condition>
<!-- Get the queue build target. Default to none. -->
<condition property="queue.target" value="none" else="${env.QUEUE_BUILD_TYPE}">
<equals arg1="${env.QUEUE_BUILD_TYPE}" arg2="$${env.QUEUE_BUILD_TYPE}" />
</condition>
<!-- Get the queue-gatk-extensions build target. Default to the queue target. -->
<condition property="queue-gatk-extensions.target" value="${queue.target}" else="${env.QUEUE_GATK_EXTENSIONS_BUILD_TYPE}">
<equals arg1="${env.QUEUE_GATK_EXTENSIONS_BUILD_TYPE}" arg2="$${env.QUEUE_GATK_EXTENSIONS_BUILD_TYPE}" />
</condition>
<!-- If the queue-gatk-extensions target is set, include all queue-gatk-extensions tasks. -->
<condition property="queue-gatk-extensions.include">
<not><equals arg1="${queue-gatk-extensions.target}" arg2="none" /></not>
</condition>
<!-- If the queue target is set, or if the queue-gatk-extensions needs to be built, then include all queue tasks. -->
<condition property="queue.include">
<or>
<not><equals arg1="${queue.target}" arg2="none" /></not>
<isset property="queue-gatk-extensions.include" />
</or>
</condition>
<!-- If queue or queue-gatk-extensions are build built, then include scala tasks (init.scalatasks) -->
<condition property="scala.include">
<or>
<isset property="queue.include" />
<isset property="queue-gatk-extensions.include" />
</or>
</condition>
<echo message="GATK build : ${gatk.target}"/>
<echo message="Queue build : ${queue.target}"/>
<echo message="Queue GATK ext. : ${queue-gatk-extensions.target}"/>
<echo message="source revision : ${build.version}"/>
<echo message="build time : ${build.timestamp}" />
<condition property="include.oneoffs">
<equals arg1="${target}" arg2="oneoffs" casesensitive="false" />
<equals arg1="${gatk.target}" arg2="oneoffs" casesensitive="false" />
</condition>
<condition property="include.playground">
<or>
<equals arg1="${target}" arg2="playground" casesensitive="false"/>
<equals arg1="${target}" arg2="oneoffs" casesensitive="false" />
<equals arg1="${gatk.target}" arg2="playground" casesensitive="false"/>
<equals arg1="${gatk.target}" arg2="oneoffs" casesensitive="false" />
</or>
</condition>
<!-- Create the build directory structure used by compile -->
<mkdir dir="${build.dir}"/>
<mkdir dir="${java.classes}"/>
</target>
<target name="java.compile" depends="tribble,init,resolve"
<target name="init.scalatasks" depends="resolve" if="scala.include"
description="Initializes the scala ant tasks from scala-compiler.jar">
<path id="scala.classpath">
<fileset dir="lib">
<include name="scala-compiler-*.jar"/>
<include name="scala-library-*.jar"/>
</fileset>
</path>
<taskdef resource="scala/tools/ant/antlib.xml">
<classpath refid="scala.classpath"/>
</taskdef>
</target>
<target name="gatk.compile" depends="tribble,init,resolve"
description="compile the source">
<!-- Compile the java code from ${src} into build -->
<javac srcdir="${java.source.dir}" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="runtime.dependencies">
<exclude name="**/examples/**" />
<exclude name="**/playground/**" unless="include.playground"/>
<exclude name="**/oneoffprojects/**" unless="include.oneoffs"/>
</javac>
</target>
<target name="extracthelp" depends="init,java.compile"
<!-- Queue depends on the gatk since it contains the StingUtils (including CommandLine) -->
<target name="queue.compile" depends="init,resolve,gatk.compile,init.scalatasks" if="queue.include" description="build Queue">
<path id="queue.classpath">
<path refid="runtime.dependencies" />
<pathelement location="${java.classes}" />
</path>
<mkdir dir="${queue.classes}"/>
<echo>Building Queue...</echo>
<scalac srcdir="${queue.source.dir}" destdir="${queue.classes}" classpathref="queue.classpath" deprecation="yes" unchecked="yes">
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
</scalac>
</target>
<!-- NOTE: Extracting help first to avoid "Unable to load help text. Help output will be sparse." warning message. -->
<target name="queue-gatk-extensions.compile" depends="gatk.compile, queue.compile, extracthelp" if="queue-gatk-extensions.include" description="generate GATK modules for Queue">
<mkdir dir="${queue-gatk-extensions.source.dir}"/>
<mkdir dir="${queue-gatk-extensions.classes}"/>
<path id="queue-gatk-extensions.classpath">
<path refid="runtime.dependencies" />
<pathelement location="${resource.path}" />
<pathelement location="${java.classes}" />
<pathelement location="${queue.classes}" />
</path>
<echo>Generating Queue GATK extensions...</echo>
<java fork="true" classname="org.broadinstitute.sting.queue.extensions.gatk.GATKExtensionsGenerator" classpathref="queue-gatk-extensions.classpath">
<arg value="-outDir" />
<arg path="${queue-gatk-extensions.source.dir}" />
</java>
<echo>Building Queue GATK extensions...</echo>
<scalac srcdir="${queue-gatk-extensions.source.dir}" destdir="${queue-gatk-extensions.classes}" classpathref="queue-gatk-extensions.classpath" deprecation="yes" unchecked="yes">
<include name="**/*.scala"/>
</scalac>
</target>
<target name="extracthelp" depends="init,gatk.compile"
description="Extract help key/value pair file from the JavaDoc tags."
unless="disable.help">
<path id="doclet.classpath">
<path refid="runtime.dependencies" />
<pathelement location="${java.classes}" />
</path>
<javadoc doclet="org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet"
docletpathref="classpath"
docletpathref="doclet.classpath"
classpathref="runtime.dependencies"
additionalparam="-build-timestamp &quot;${build.timestamp}&quot; -version-suffix .${build.version} -out ${basedir}/${java.classes}/${resource.file}">
additionalparam="-build-timestamp &quot;${build.timestamp}&quot; -version-suffix .${build.version} -out ${basedir}/${resource.path}">
<packageset refid="java.source.files"/>
</javadoc>
</target>
<target name="dist" depends="java.compile,extracthelp"
description="generate the distribution">
<target name="sting.compile" depends="gatk.compile, queue.compile, queue-gatk-extensions.compile" />
<target name="init.jar" depends="sting.compile,extracthelp">
<mkdir dir="${dist.dir}"/>
<delete>
<fileset dir="${dist.dir}" includes="*.jar" />
@ -141,14 +249,20 @@
<copy todir="${dist.dir}">
<fileset dir="lib" includes="*.jar"/>
</copy>
</target>
<target name="sting-utils.jar" depends="gatk.compile, init.jar">
<jar jarfile="${dist.dir}/StingUtils.jar">
<fileset dir="${java.classes}" includes="**/utils/**/*.class"/>
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
<manifest>
<attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" />
</manifest>
</jar>
</target>
<target name="gatk.jar" depends="gatk.compile, init.jar"
description="generate the GATK distribution">
<jar jarfile="${dist.dir}/GenomeAnalysisTK.jar">
<path refid="gatk.resources"/>
<fileset dir="${java.classes}">
@ -193,12 +307,46 @@
<attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.vcf.VCFTool" />
</manifest>
</jar>
</target>
<target name="queue.jar" depends="queue.compile, init.jar" if="queue.include">
<jar jarfile="${dist.dir}/Queue.jar">
<fileset dir="${queue.classes}">
<include name="org/broadinstitute/sting/queue/**/*.class"/>
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
</manifest>
</jar>
</target>
<target name="queue-gatk-extensions.jar" depends="queue-gatk-extensions.compile, init.jar" if="queue-gatk-extensions.include">
<jar jarfile="${dist.dir}/QueueGATKExtensions.jar">
<fileset dir="${queue-gatk-extensions.classes}">
<include name="**/*.class" />
</fileset>
</jar>
</target>
<target name="sting.jar" depends="sting-utils.jar, gatk.jar, queue.jar, queue-gatk-extensions.jar" />
<target name="init.manifests" depends="sting.jar">
<pathconvert property="jar.classpath" pathsep=" ">
<flattenmapper/>
<fileset dir="${dist.dir}" includes="*.jar"/>
<filelist files="GATKScala.jar"/>
</pathconvert>
</target>
<target name="sting-utils.manifests" depends="sting-utils.jar, init.manifests">
<jar jarfile="${dist.dir}/StingUtils.jar" update="true">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}"/>
</manifest>
</jar>
</target>
<target name="gatk.manifests" depends="gatk.jar, init.manifests">
<jar jarfile="${dist.dir}/VCFTool.jar" update="true" >
<manifest>
@ -206,12 +354,6 @@
</manifest>
</jar>
<jar jarfile="${dist.dir}/StingUtils.jar" update="true">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}"/>
</manifest>
</jar>
<jar jarfile="${dist.dir}/GenomeAnalysisTK.jar" update="true">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}"/>
@ -232,29 +374,57 @@
</target>
<target name="queue.manifests" depends="queue.jar, init.manifests" if="queue.include">
<jar jarfile="${dist.dir}/Queue.jar" update="true" >
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
</target>
<target name="queue-gatk-extensions.manifests" depends="queue-gatk-extensions.jar, init.manifests" if="queue-gatk-extensions.include">
<jar jarfile="${dist.dir}/QueueGATKExtensions.jar" update="true" >
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
</target>
<target name="sting.manifests" depends="sting-utils.manifests, gatk.manifests, queue.manifests, queue-gatk-extensions.manifests" />
<target name="dist" depends="sting.manifests" />
<target name="core" description="force a build of the Sting core code">
<antcall target="dist" inheritAll="true">
<param name="target" value="core" />
<param name="sting.target" value="core" />
</antcall>
</target>
<target name="playground" description="force a build of the Sting experimental code">
<antcall target="dist" inheritAll="true">
<param name="target" value="playground" />
<param name="sting.target" value="playground" />
</antcall>
</target>
<target name="oneoffs" description="force a build of the Sting experimental code and one-offs">
<antcall target="dist" inheritAll="true">
<param name="target" value="oneoffs" />
<param name="sting.target" value="oneoffs" />
</antcall>
</target>
<target name="queue" description="force a build of Queue">
<antcall target="dist" inheritAll="true">
<param name="queue.target" value="core" />
</antcall>
</target>
<target name="java.test.compile" depends="oneoffs">
<echo message="Sting: Compiling test cases!"/>
<mkdir dir="${java.test.classes}"/>
<javac destdir="${java.test.classes}" debug="true" optimize="on">
<src path="${java.test.sources}"/>
<exclude name="**/playground/**" unless="include.playground"/>
<exclude name="**/oneoffprojects/**" unless="include.oneoffs"/>
<classpath>
<path refid="runtime.dependencies" />
<pathelement location="${java.classes}"/>
@ -300,46 +470,6 @@
</jar>
</target>
<!-- Queue target -->
<target name="queue" description="build Queue">
<antcall target="resolve">
<param name="ivy.conf" value="queue"/>
</antcall>
<!-- Call dist so jar files are picked up for manifest. -->
<antcall target="dist" />
<property name="queue.source.dir" value="scala/src" />
<property name="queue.classes" value="${build.dir}/scala/classes" />
<path id="queue.classpath">
<path refid="runtime.dependencies" />
<pathelement location="${java.classes}" />
</path>
<taskdef resource="scala/tools/ant/antlib.xml">
<classpath refid="queue.classpath"/>
</taskdef>
<mkdir dir="${queue.classes}"/>
<echo>Building Queue...</echo>
<scalac srcdir="${queue.source.dir}" destdir="${queue.classes}" classpathref="queue.classpath">
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
</scalac>
<pathconvert property="queuejar.classpath" pathsep=" ">
<flattenmapper/>
<fileset dir="${dist.dir}" includes="*.jar"/>
</pathconvert>
<jar jarfile="${dist.dir}/Queue.jar">
<fileset dir="${queue.classes}">
<include name="org/broadinstitute/sting/queue/**/*.class"/>
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
<attribute name="Class-Path" value="${queuejar.classpath}" />
</manifest>
</jar>
</target>
<!-- ***************************************************************************** -->
<!-- *********** Tests and associated tasks ********* -->
<!-- ***************************************************************************** -->

57
ivy.xml
View File

@ -8,46 +8,40 @@
<conf name="findbugs" extends="default" description="the dependencies for Findbugs"/>
</configurations>
<dependencies defaultconf="default">
<dependency org="net.sf" name="sam" rev="latest.integration" conf="default"/>
<dependency org="net.sf" name="picard" rev="latest.integration" conf="default"/>
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration" conf="default"/>
<dependency org="junit" name="junit" rev="4.4" conf="default"/>
<dependency org="log4j" name="log4j" rev="1.2.15" conf="default"/>
<dependency org="colt" name="colt" rev="1.2.0" conf="default"/>
<dependency org="jboss" name="javassist" rev="3.7.ga" conf="default"/>
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4" conf="default"/>
<dependency org="org.apache.bcel" name="bcel" rev="5.2" conf="default"/>
<dependency org="net.sf" name="sam" rev="latest.integration"/>
<dependency org="net.sf" name="picard" rev="latest.integration"/>
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
<dependency org="junit" name="junit" rev="4.4"/>
<dependency org="log4j" name="log4j" rev="1.2.15"/>
<dependency org="colt" name="colt" rev="1.2.0"/>
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
<!-- Dependencies for reflections mvn repository -->
<dependency org="org.reflections" name="reflections" rev="0.9.5-svnversion79M_mod2" conf="default"/>
<dependency org="com.google.collections" name="google-collections" rev="0.9" conf="default"/>
<dependency org="javassist" name="javassist" rev="3.8.0.GA"/>
<dependency org="ch.qos.logback" name="logback-core" rev="0.9.9"/>
<dependency org="ch.qos.logback" name="logback-classic" rev="0.9.9"/>
<dependency org="org.slf4j" name="slf4j-api" rev="1.5.6"/>
<dependency org="xml-apis" name="xml-apis" rev="1.0.b2"/>
<dependency org="com.google.collections" name="google-collections" rev="1.0"/>
<dependency org="dom4j" name="dom4j" rev="1.6"/>
<dependency org="org.reflections" name="reflections" rev="0.9.5-svnversion79M_mod2"/>
<!-- Matrix package from math.nist.gov -->
<dependency org="gov.nist" name="Jama" rev="1.0.2" conf="default"/>
<dependency org="gov.nist" name="Jama" rev="1.0.2"/>
<!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3" conf="default"/>
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<!-- Dependencies for VariantFiltration -->
<!-- <dependency org="commons-jexl" name="commons-jexl" rev="1.1" conf="default"/> -->
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0" conf="default"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1" conf="default"/>
<dependency org="commons-io" name="commons-io" rev="1.3.2" conf="default"/>
<!-- <dependency org="commons-jexl" name="commons-jexl" rev="1.1"/> -->
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
<dependency org="commons-io" name="commons-io" rev="1.3.2"/>
<!-- Dependencies for Queue GATK Extensions code generator living in java/src -->
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
<!-- Scala dependancies -->
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.0.RC6" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.0.RC6" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.0" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.0" conf="scala->default"/>
<!-- Queue additional dependencies -->
<dependency org="commons-lang" name="commons-lang" rev="2.5" conf="queue->default"/>
<dependency org="edu.mit.broad" name="broad-core-all" rev="2.8" conf="queue->default"/>
<dependency org="uk.com.robust-it" name="cloning" rev="1.7.1" conf="queue->default" />
<!-- findbug dependencies -->
<dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/>
@ -56,5 +50,8 @@
<dependency org="net.sourceforge.findbugs" name="annotations" rev="1.3.2" conf="default"/>
<dependency org="net.sourceforge.findbugs" name="jsr305" rev="1.3.2" conf="default"/>
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
<exclude org="com.sun.*" />
<exclude org="javax.*" />
</dependencies>
</ivy-module>

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.analyzecovariates;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.walkers.recalibration.*;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
@ -51,7 +52,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
// Command Line Arguments
/////////////////////////////
@Argument(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeCovariates/";

View File

@ -40,6 +40,11 @@ public class ArgumentDefinition {
*/
public final ArgumentIOType ioType;
/**
* The class of the argument.
*/
public final Class argumentType;
/**
* Full name of the argument. Must have a value.
*/
@ -70,6 +75,11 @@ public class ArgumentDefinition {
*/
public final boolean isMultiValued;
/**
* The class of the componentType. Not used for scalars.
*/
public final Class componentType;
/**
* Is this argument hidden from the help system?
*/
@ -93,35 +103,41 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param ioType Whether the argument is an input or an output.
* @param argumentType The class of the field.
* @param fullName Full name for this argument definition.
* @param shortName Short name for this argument definition.
* @param doc Doc string for this argument.
* @param required Whether or not this argument is required.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
* @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param exclusiveOf Whether this command line argument is mutually exclusive of other arguments.
* @param validation A regular expression for command-line argument validation.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( ArgumentIOType ioType,
Class argumentType,
String fullName,
String shortName,
String doc,
boolean required,
boolean isFlag,
boolean isMultiValued,
Class componentType,
boolean isHidden,
String exclusiveOf,
String validation,
List<String> validOptions) {
this.ioType = ioType;
this.argumentType = argumentType;
this.fullName = fullName;
this.shortName = shortName;
this.doc = doc;
this.required = required;
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = exclusiveOf;
this.validation = validation;
@ -131,18 +147,22 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param annotation The annotation on the field.
* @param argumentType The class of the field.
* @param defaultFullName Default full name for this argument definition.
* @param defaultShortName Default short name for this argument definition.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
* @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( Annotation annotation,
Class argumentType,
String defaultFullName,
String defaultShortName,
boolean isFlag,
boolean isMultiValued,
Class componentType,
boolean isHidden,
List<String> validOptions) {
@ -162,13 +182,15 @@ public class ArgumentDefinition {
else
shortName = null;
this.ioType = getIOType(annotation);
this.ioType = ArgumentIOType.getIOType(annotation);
this.argumentType = argumentType;
this.fullName = fullName;
this.shortName = shortName;
this.doc = getDoc(annotation);
this.required = isRequired(annotation, isFlag);
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = getExclusiveOf(annotation);
this.validation = getValidationRegex(annotation);
@ -178,25 +200,31 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param annotation The annotation on the field.
* @param argumentType The class of the field.
* @param fieldName Default full name for this argument definition.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
* @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( Annotation annotation,
Class argumentType,
String fieldName,
boolean isFlag,
boolean isMultiValued,
Class componentType,
boolean isHidden,
List<String> validOptions) {
this.ioType = getIOType(annotation);
this.ioType = ArgumentIOType.getIOType(annotation);
this.argumentType = argumentType;
this.fullName = getFullName(annotation, fieldName);
this.shortName = getShortName(annotation);
this.doc = getDoc(annotation);
this.required = isRequired(annotation, isFlag);
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = getExclusiveOf(annotation);
this.validation = getValidationRegex(annotation);
@ -222,17 +250,6 @@ public class ArgumentDefinition {
Utils.equals(shortName,other.shortName);
}
/**
* Returns the ArgumentIOType for the annotation.
* @param annotation @Input or @Output
* @return ArgumentIOType.Input, Output, or Unknown
*/
public static ArgumentIOType getIOType(Annotation annotation) {
if (annotation instanceof Input) return ArgumentIOType.INPUT;
if (annotation instanceof Output) return ArgumentIOType.OUTPUT;
return ArgumentIOType.UNKNOWN;
}
/**
* A hack to get around the fact that Java doesn't like inheritance in Annotations.
* @param annotation to run the method on

View File

@ -24,6 +24,28 @@
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.utils.StingException;
import java.lang.annotation.Annotation;
public enum ArgumentIOType {
INPUT, OUTPUT, UNKNOWN
INPUT(Input.class), OUTPUT(Output.class), ARGUMENT(Argument.class);
public final Class<? extends Annotation> annotationClass;
ArgumentIOType(Class<? extends Annotation> annotationClass) {
this.annotationClass = annotationClass;
}
/**
* Returns the ArgumentIOType for the annotation.
* @param annotation @Input or @Output
* @return ArgumentIOType.Input, Output, or Unknown
*/
public static ArgumentIOType getIOType(Annotation annotation) {
for (ArgumentIOType ioType: ArgumentIOType.values())
if (ioType.annotationClass.isAssignableFrom(annotation.getClass()))
return ioType;
throw new StingException("Unknown annotation type: " + annotation);
}
}

View File

@ -0,0 +1,225 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import java.util.*;
/**
* A mapping of all the sites where an argument definition maps to a site on the command line.
*/
public class ArgumentMatch implements Iterable<ArgumentMatch> {
/**
* The argument definition that's been matched.
*/
public final ArgumentDefinition definition;
/**
* The text that's been matched, as it appears in the command line arguments.
*/
public final String label;
/**
* Maps indicies of command line arguments to values paired with that argument.
*/
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
/**
* Create a new argument match, defining its properties later. Used to create invalid arguments.
*/
public ArgumentMatch() {
this.label = null;
this.definition = null;
}
/**
* A simple way of indicating that an argument with the given label and definition exists at this index.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null.
*/
public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
this( label, definition, index, null );
}
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
this.label = label;
this.definition = definition;
ArrayList<String> values = new ArrayList<String>();
if( value != null )
values.add(value);
indices.put(index,values );
}
/**
* Return a string representation of the given argument match, for debugging purposes.
* @return String representation of the match.
*/
public String toString() {
return label;
}
/**
* Creates an iterator that walks over each individual match at each position of a given argument.
* @return An iterator over the individual matches in this argument. Will not be null.
*/
public Iterator<ArgumentMatch> iterator() {
return new Iterator<ArgumentMatch>() {
/**
* Iterate over each the available index.
*/
private Iterator<Integer> indexIterator = null;
/**
* Iterate over each available token.
*/
private Iterator<String> tokenIterator = null;
/**
* The next index to return. Null if none remain.
*/
Integer nextIndex = null;
/**
* The next token to return. Null if none remain.
*/
String nextToken = null;
{
indexIterator = indices.keySet().iterator();
prepareNext();
}
/**
* Is there a nextToken available to return?
* @return True if there's another token waiting in the wings. False otherwise.
*/
public boolean hasNext() {
return nextToken != null;
}
/**
* Get the next token, if one exists. If not, throw an IllegalStateException.
* @return The next ArgumentMatch in the series. Should never be null.
*/
public ArgumentMatch next() {
if( nextIndex == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" );
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
prepareNext();
return match;
}
/**
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
* initialize nextIndex / nextToken to null.
*/
private void prepareNext() {
if( tokenIterator != null && tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
}
else {
nextIndex = null;
nextToken = null;
// Do a nested loop. While more data is present in the inner loop, grab that data.
// Otherwise, troll the outer iterator looking for more data.
while( indexIterator.hasNext() ) {
nextIndex = indexIterator.next();
if( indices.get(nextIndex) != null ) {
tokenIterator = indices.get(nextIndex).iterator();
if( tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
break;
}
}
}
}
}
/**
* Remove is unsupported in this context.
*/
public void remove() {
throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
}
};
}
/**
* Merge two ArgumentMatches, so that the values for all arguments go into the
* same data structure.
* @param other The other match to merge into.
*/
public void mergeInto( ArgumentMatch other ) {
indices.putAll(other.indices);
}
/**
* Associate a value with this merge maapping.
* @param index index of the command-line argument to which this value is mated.
* @param value Text representation of value to add.
*/
public void addValue( int index, String value ) {
if( !indices.containsKey(index) || indices.get(index) == null )
indices.put(index, new ArrayList<String>() );
indices.get(index).add(value);
}
/**
* Does this argument already have a value at the given site?
* Arguments are only allowed to be single-valued per site, and
* flags aren't allowed a value at all.
* @param index Index at which to check for values.
* @return True if the argument has a value at the given site. False otherwise.
*/
public boolean hasValueAtSite( int index ) {
return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
}
/**
* Return the values associated with this argument match.
* @return A collection of the string representation of these value.
*/
public List<String> values() {
List<String> values = new ArrayList<String>();
for( int index: indices.keySet() ) {
if( indices.get(index) != null )
values.addAll(indices.get(index));
}
return values;
}
/**
* Convenience method returning true if the definition is a flag.
* @return True if definition is known to be a flag; false if not known to be a flag.
*/
private boolean isArgumentFlag() {
return definition != null && definition.isFlag;
}
}

View File

@ -192,200 +192,3 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
return new HashSet<ArgumentMatch>( argumentMatches.values() );
}
}
/**
* A mapping of all the sites where an argument definition maps to a site on the command line.
*/
class ArgumentMatch implements Iterable<ArgumentMatch> {
/**
* The argument definition that's been matched.
*/
public final ArgumentDefinition definition;
/**
* The text that's been matched, as it appears in the command line arguments.
*/
public final String label;
/**
* Maps indicies of command line arguments to values paired with that argument.
*/
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
/**
* Create a new argument match, defining its properties later. Used to create invalid arguments.
*/
public ArgumentMatch() {
this.label = null;
this.definition = null;
}
/**
* A simple way of indicating that an argument with the given label and definition exists at this index.
* @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null.
*/
public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
this( label, definition, index, null );
}
private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
this.label = label;
this.definition = definition;
ArrayList<String> values = new ArrayList<String>();
if( value != null )
values.add(value);
indices.put(index,values );
}
/**
* Return a string representation of the given argument match, for debugging purposes.
* @return String representation of the match.
*/
public String toString() {
return label;
}
/**
* Creates an iterator that walks over each individual match at each position of a given argument.
* @return An iterator over the individual matches in this argument. Will not be null.
*/
public Iterator<ArgumentMatch> iterator() {
return new Iterator<ArgumentMatch>() {
/**
* Iterate over each the available index.
*/
private Iterator<Integer> indexIterator = null;
/**
* Iterate over each available token.
*/
private Iterator<String> tokenIterator = null;
/**
* The next index to return. Null if none remain.
*/
Integer nextIndex = null;
/**
* The next token to return. Null if none remain.
*/
String nextToken = null;
{
indexIterator = indices.keySet().iterator();
prepareNext();
}
/**
* Is there a nextToken available to return?
* @return True if there's another token waiting in the wings. False otherwise.
*/
public boolean hasNext() {
return nextToken != null;
}
/**
* Get the next token, if one exists. If not, throw an IllegalStateException.
* @return The next ArgumentMatch in the series. Should never be null.
*/
public ArgumentMatch next() {
if( nextIndex == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" );
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
prepareNext();
return match;
}
/**
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
* initialize nextIndex / nextToken to null.
*/
private void prepareNext() {
if( tokenIterator != null && tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
}
else {
nextIndex = null;
nextToken = null;
// Do a nested loop. While more data is present in the inner loop, grab that data.
// Otherwise, troll the outer iterator looking for more data.
while( indexIterator.hasNext() ) {
nextIndex = indexIterator.next();
if( indices.get(nextIndex) != null ) {
tokenIterator = indices.get(nextIndex).iterator();
if( tokenIterator.hasNext() ) {
nextToken = tokenIterator.next();
break;
}
}
}
}
}
/**
* Remove is unsupported in this context.
*/
public void remove() {
throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
}
};
}
/**
* Merge two ArgumentMatches, so that the values for all arguments go into the
* same data structure.
* @param other The other match to merge into.
*/
public void mergeInto( ArgumentMatch other ) {
indices.putAll(other.indices);
}
/**
* Associate a value with this merge maapping.
* @param index index of the command-line argument to which this value is mated.
* @param value Text representation of value to add.
*/
public void addValue( int index, String value ) {
if( !indices.containsKey(index) || indices.get(index) == null )
indices.put(index, new ArrayList<String>() );
indices.get(index).add(value);
}
/**
* Does this argument already have a value at the given site?
* Arguments are only allowed to be single-valued per site, and
* flags aren't allowed a value at all.
* @param index Index at which to check for values.
* @return True if the argument has a value at the given site. False otherwise.
*/
public boolean hasValueAtSite( int index ) {
return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
}
/**
* Return the values associated with this argument match.
* @return A collection of the string representation of these value.
*/
public List<String> values() {
List<String> values = new ArrayList<String>();
for( int index: indices.keySet() ) {
if( indices.get(index) != null )
values.addAll(indices.get(index));
}
return values;
}
/**
* Convenience method returning true if the definition is a flag.
* @return True if definition is known to be a flag; false if not known to be a flag.
*/
private boolean isArgumentFlag() {
return definition != null && definition.isFlag;
}
}

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.gatk.walkers.Hidden;
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.Arrays;
import java.util.List;
/**
@ -41,9 +41,9 @@ import java.util.List;
*/
public class ArgumentSource {
/**
* Class to which the field belongs.
* Field into which to inject command-line arguments.
*/
public final Class clazz;
public final Field[] parentFields;
/**
* Field into which to inject command-line arguments.
@ -57,11 +57,19 @@ public class ArgumentSource {
/**
* Create a new command-line argument target.
* @param clazz Class containing the argument.
* @param field Field containing the argument. Field must be annotated with 'Argument'.
* @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'.
*/
public ArgumentSource( Class clazz, Field field ) {
this.clazz = clazz;
public ArgumentSource( Field field ) {
this(new Field[0], field);
}
/**
* Create a new command-line argument target.
* @param parentFields Parent fields containing the the field. Field must be annotated with 'ArgumentCollection'.
* @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'.
*/
public ArgumentSource( Field[] parentFields, Field field ) {
this.parentFields = parentFields;
this.field = field;
this.typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
}
@ -80,7 +88,7 @@ public class ArgumentSource {
return false;
ArgumentSource otherArgumentSource = (ArgumentSource)other;
return this.clazz.equals(otherArgumentSource.clazz) && this.field.equals(otherArgumentSource.field);
return this.field == otherArgumentSource.field && Arrays.equals(this.parentFields, otherArgumentSource.parentFields);
}
/**
@ -89,7 +97,7 @@ public class ArgumentSource {
*/
@Override
public int hashCode() {
return clazz.hashCode() ^ field.hashCode();
return field.hashCode();
}
/**
@ -118,18 +126,11 @@ public class ArgumentSource {
/**
* Parses the specified value based on the specified type.
* @param source The type of value to be parsed.
* @param values String representation of all values passed.
* @return the parsed value of the object.
*/
public Object parse( ArgumentSource source, ArgumentMatches values ) {
Object value = null;
if( !isFlag() )
value = typeDescriptor.parse( source, values );
else
value = true;
return value;
public Object parse( ArgumentMatches values ) {
return typeDescriptor.parse( this, values );
}
/**
@ -145,8 +146,7 @@ public class ArgumentSource {
* @return True if the argument supports multiple values.
*/
public boolean isMultiValued() {
Class argumentType = field.getType();
return Collection.class.isAssignableFrom(argumentType) || field.getType().isArray();
return typeDescriptor.isMultiValued( this );
}
/**
@ -162,6 +162,6 @@ public class ArgumentSource {
* @return String representation of the argument source.
*/
public String toString() {
return clazz.getSimpleName() + ": " + field.getName();
return field.getDeclaringClass().getSimpleName() + ": " + field.getName();
}
}

View File

@ -113,10 +113,26 @@ public abstract class ArgumentTypeDescriptor {
return Collections.singletonList(createDefaultArgumentDefinition(source));
}
/**
* Parses an argument source to an object.
* @param source The source used to find the matches.
* @param matches The matches for the source.
* @return The parsed object.
*/
public Object parse( ArgumentSource source, ArgumentMatches matches ) {
return parse( source, source.field.getType(), matches );
}
/**
* Returns true if the field is a collection or an array.
* @param source The argument source to check.
* @return true if the field is a collection or an array.
*/
public boolean isMultiValued( ArgumentSource source ) {
Class argumentType = source.field.getType();
return Collection.class.isAssignableFrom(argumentType) || argumentType.isArray();
}
/**
* By default, argument sources create argument definitions with a set of default values.
* Use this method to create the one simple argument definition.
@ -125,15 +141,41 @@ public abstract class ArgumentTypeDescriptor {
*/
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
return new ArgumentDefinition( getArgumentAnnotation(source),
source.field.getType(),
source.field.getName(),
source.isFlag(),
source.isMultiValued(),
getCollectionComponentType(source.field),
source.isHidden(),
getValidOptions(source) );
}
public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches );
/**
* Return the component type of a field, or String.class if the type cannot be found.
* @param field The reflected field to inspect.
* @return The parameterized component type, or String.class if the parameterized type could not be found.
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
protected Class getCollectionComponentType( Field field ) {
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
return (Class)parameterizedType.getActualTypeArguments()[0];
}
else
return String.class;
}
/**
* Parses the argument matches for a class type into an object.
* @param source The original argument source used to find the matches.
* @param type The current class type being inspected. May not match the argument source.field.getType() if this as a collection for example.
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type.
*/
public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches );
/**
* If the argument source only accepts a small set of options, populate the returned list with
@ -193,6 +235,11 @@ public abstract class ArgumentTypeDescriptor {
throw new StingException("ArgumentAnnotation is not present for the argument field: " + source.field.getName());
}
/**
* Returns true if an argument annotation is present
* @param field The field to check for an annotation.
* @return True if an argument annotation is present on the field.
*/
@SuppressWarnings("unchecked")
public static boolean isArgumentAnnotationPresent(Field field) {
for (Class annotation: ARGUMENT_ANNOTATIONS)
@ -235,6 +282,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) {
if (source.isFlag())
return true;
String value = getArgumentValue( createDefaultArgumentDefinition(source), matches );
// lets go through the types we support
@ -301,7 +350,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public boolean supports( Class type ) {
return ( Collection.class.isAssignableFrom(type) || type.isArray() );
}
@Override
@SuppressWarnings("unchecked")
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches )
@ -319,16 +368,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
else if( java.util.Set.class.isAssignableFrom(type) ) type = java.util.TreeSet.class;
}
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
if( source.field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString());
componentType = (Class)parameterizedType.getActualTypeArguments()[0];
}
else
componentType = String.class;
componentType = getCollectionComponentType( source.field );
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
Collection collection;

View File

@ -43,11 +43,11 @@ public abstract class CommandLineProgram {
private static Logger logger = Logger.getRootLogger();
/** the default log level */
@Input(fullName = "logging_level",
@Argument(fullName = "logging_level",
shortName = "l",
doc = "Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging.",
required = false)
protected String logging_level = "WARN";
protected String logging_level = "INFO";
/** where to send the output of our logger */
@ -58,21 +58,21 @@ public abstract class CommandLineProgram {
protected String toFile = null;
/** do we want to silence the command line output */
@Input(fullName = "quiet_output_mode",
@Argument(fullName = "quiet_output_mode",
shortName = "quiet",
doc = "Set the logging to quiet mode, no output to stdout",
required = false)
protected Boolean quietMode = false;
/** do we want to generate debugging information with the logs */
@Input(fullName = "debug_mode",
@Argument(fullName = "debug_mode",
shortName = "debug",
doc = "Set the logging file string to include a lot of debugging information (SLOW!)",
required = false)
protected Boolean debugMode = false;
/** this is used to indicate if they've asked for help */
@Input(fullName = "help", shortName = "h", doc = "Generate this help message", required = false)
@Argument(fullName = "help", shortName = "h", doc = "Generate this help message", required = false)
public Boolean help = false;
/** our logging output patterns */
@ -146,6 +146,7 @@ public abstract class CommandLineProgram {
* @param clp the command line program to execute
* @param args the command line arguments passed in
*/
@SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) {
try {
@ -174,14 +175,14 @@ public abstract class CommandLineProgram {
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
parser.parse(args);
if (isHelpPresent(clp, parser))
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
} else {
parser.parse(args);
if (isHelpPresent(clp, parser))
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
@ -216,7 +217,7 @@ public abstract class CommandLineProgram {
// if they specify a log location, output our data there
if (clp.toFile != null) {
FileAppender appender = null;
FileAppender appender;
try {
appender = new FileAppender(layout, clp.toFile, false);
logger.addAppender(appender);
@ -258,7 +259,7 @@ public abstract class CommandLineProgram {
*/
private static void toErrorLog(CommandLineProgram clp, Exception e) {
File logFile = new File("GATK_Error.log");
PrintStream stream = null;
PrintStream stream;
try {
stream = new PrintStream(logFile);
} catch (Exception e1) { // catch all the exceptions here, if we can't create the file, do the alternate path
@ -279,22 +280,12 @@ public abstract class CommandLineProgram {
parser.loadArgumentsIntoObject(obj);
}
/**
* a manual way to load argument providing objects into the program
*
* @param clp the command line program
* @param cls the class to load the arguments off of
*/
public void loadAdditionalSource(CommandLineProgram clp, Class cls) {
parser.addArgumentSource(clp.getArgumentSourceName(cls), cls);
}
/**
* this function checks the logger level passed in on the command line, taking the lowest
* level that was provided.
*/
private void setupLoggerLevel() {
Level par = Level.WARN;
Level par;
if (logging_level.toUpperCase().equals("DEBUG")) {
par = Level.DEBUG;
} else if (logging_level.toUpperCase().equals("ERROR")) {
@ -316,9 +307,9 @@ public abstract class CommandLineProgram {
}
/**
* a function used to indicate an error occured in the command line tool
* a function used to indicate an error occurred in the command line tool
*
* @param msg
* @param msg message to display
*/
private static void printExitSystemMsg(final String msg) {
System.out.printf("The following error has occurred:%n%n");
@ -334,12 +325,11 @@ public abstract class CommandLineProgram {
/**
* Do a cursory search for the given argument.
*
* @param clp Instance of the command-line program.
* @param parser Parser
*
* @return True if help is present; false otherwise.
*/
private static boolean isHelpPresent(CommandLineProgram clp, ParsingEngine parser) {
private static boolean isHelpPresent(ParsingEngine parser) {
return parser.isArgumentPresent("help");
}

View File

@ -270,26 +270,38 @@ public class ParsingEngine {
return;
// Target instance into which to inject the value.
List<Object> targets = new ArrayList<Object>();
// Check to see whether the instance itself can be the target.
if( source.clazz.isAssignableFrom(instance.getClass()) ) {
targets.add(instance);
}
// Check to see whether a contained class can be the target.
targets.addAll(getContainersMatching(instance,source.clazz));
Collection<Object> targets = findTargets( source, instance );
// Abort if no home is found for the object.
if( targets.size() == 0 )
throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches);
for( Object target: targets ) {
Object value = (argumentMatches.size() != 0) ? source.parse(source,argumentMatches) : source.getDefault();
Object value = (argumentMatches.size() != 0) ? source.parse(argumentMatches) : source.getDefault();
JVMUtils.setFieldValue(source.field,target,value);
}
}
/**
* Gets a collection of the container instances of the given type stored within the given target.
* @param source Argument source.
* @param instance Container.
* @return A collection of containers matching the given argument source.
*/
private Collection<Object> findTargets(ArgumentSource source, Object instance) {
LinkedHashSet<Object> targets = new LinkedHashSet<Object>();
for( Class clazz = instance.getClass(); clazz != null; clazz = clazz.getSuperclass() ) {
for( Field field: clazz.getDeclaredFields() ) {
if( field.equals(source.field) ) {
targets.add(instance);
} else if( field.isAnnotationPresent(ArgumentCollection.class) ) {
targets.addAll(findTargets(source, JVMUtils.getFieldValue(field, instance)));
}
}
}
return targets;
}
/**
* Prints out the help associated with these command-line argument definitions.
* @param applicationDetails Details about the specific GATK-based application being run.
@ -303,15 +315,22 @@ public class ParsingEngine {
* @param sourceClass class to act as sources for other arguments.
* @return A list of sources associated with this object and its aggregated objects.
*/
protected static List<ArgumentSource> extractArgumentSources(Class sourceClass) {
public static List<ArgumentSource> extractArgumentSources(Class sourceClass) {
return extractArgumentSources(sourceClass, new Field[0]);
}
private static List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
List<ArgumentSource> argumentSources = new ArrayList<ArgumentSource>();
while( sourceClass != null ) {
Field[] fields = sourceClass.getDeclaredFields();
for( Field field: fields ) {
if( ArgumentTypeDescriptor.isArgumentAnnotationPresent(field) )
argumentSources.add( new ArgumentSource(sourceClass,field) );
if( field.isAnnotationPresent(ArgumentCollection.class) )
argumentSources.addAll( extractArgumentSources(field.getType()) );
argumentSources.add( new ArgumentSource(parentFields, field) );
if( field.isAnnotationPresent(ArgumentCollection.class) ) {
Field[] newParentFields = Arrays.copyOf(parentFields, parentFields.length + 1);
newParentFields[parentFields.length] = field;
argumentSources.addAll( extractArgumentSources(field.getType(), newParentFields) );
}
}
sourceClass = sourceClass.getSuperclass();
}
@ -350,24 +369,6 @@ public class ParsingEngine {
// No parse results found.
return null;
}
/**
* Gets a list of the container instances of the given type stored within the given target.
* @param target Class holding the container.
* @param type Container type.
* @return A list of containers matching the given type.
*/
private List<Object> getContainersMatching(Object target, Class<?> type) {
List<Object> containers = new ArrayList<Object>();
Field[] fields = target.getClass().getDeclaredFields();
for( Field field: fields ) {
if( field.isAnnotationPresent(ArgumentCollection.class) && type.isAssignableFrom(field.getType()) )
containers.add(JVMUtils.getFieldValue(field,target));
}
return containers;
}
}
/**

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.GATKErrorReport;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.commandline.*;
@ -135,7 +134,7 @@ public class CommandLineGATK extends CommandLineExecutable {
* @return A string summarizing the walkers available in this distribution.
*/
private String getAdditionalHelp() {
String additionalHelp = "";
String additionalHelp;
// If no analysis name is present, fill in extra help on the walkers.
WalkerManager walkerManager = GATKEngine.getWalkerManager();
@ -152,7 +151,7 @@ public class CommandLineGATK extends CommandLineExecutable {
private static final int WALKER_INDENT = 3;
private static final String FIELD_SEPARATOR = " ";
private String getWalkerHelp(Class<Walker> walkerType) {
private String getWalkerHelp(Class<? extends Walker> walkerType) {
// Construct a help string to output details on this walker.
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);

View File

@ -40,19 +40,10 @@ import org.broadinstitute.sting.utils.help.SummaryTaglet;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: hanna
* Date: Mar 17, 2009
* Time: 3:14:28 PM
* To change this template use File | Settings | File Templates.
* Plugin manager that also provides various utilities for inspecting Walkers.
*/
public class WalkerManager extends PluginManager<Walker> {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(WalkerManager.class);
/**
* A collection of help text for walkers and their enclosing packages.
*/
@ -92,7 +83,7 @@ public class WalkerManager extends PluginManager<Walker> {
public String getPackageDisplayName(String packageName) {
// Try to find an override for the display name of this package.
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
String displayName = null;
String displayName;
if(helpText.containsKey(displayNameKey)) {
displayName = helpText.getString(displayNameKey);
}
@ -130,6 +121,15 @@ public class WalkerManager extends PluginManager<Walker> {
return helpText.getString(walkerSummary);
}
/**
* Gets the summary help text associated with a given walker type.
* @param walker Walker for which to search for help text.
* @return Walker summary description, or "" if none exists.
*/
public String getWalkerSummaryText(Walker walker) {
return getWalkerSummaryText(walker.getClass());
}
/**
* Gets the descriptive help text associated with a given walker type.
* @param walkerType Type of walker for which to search for help text.
@ -142,13 +142,34 @@ public class WalkerManager extends PluginManager<Walker> {
return helpText.getString(walkerDescription);
}
/**
* Gets the descriptive help text associated with a given walker type.
* @param walker Walker for which to search for help text.
* @return Walker full description, or "" if none exists.
*/
public String getWalkerDescriptionText(Walker walker) {
return getWalkerDescriptionText(walker.getClass());
}
/**
* Retrieves the walker class given a walker name.
* @param walkerName Name of the walker.
* @return Class representing the walker.
*/
public Class<Walker> getWalkerClassByName(String walkerName) {
return (Class<Walker>)pluginsByName.get(walkerName);
public Class<? extends Walker> getWalkerClassByName(String walkerName) {
return pluginsByName.get(walkerName);
}
/**
* Gets the data source for the provided walker.
* @param walkerClass The class of the walker.
* @return Which type of data source to traverse over...reads or reference?
*/
public static DataSource getWalkerDataSource(Class<? extends Walker> walkerClass) {
By byDataSource = walkerClass.getAnnotation(By.class);
if( byDataSource == null )
throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
return byDataSource.value();
}
/**
@ -157,21 +178,38 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Which type of data source to traverse over...reads or reference?
*/
public static DataSource getWalkerDataSource(Walker walker) {
Class<? extends Walker> walkerClass = walker.getClass();
By byDataSource = walkerClass.getAnnotation(By.class);
if( byDataSource == null )
throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
return byDataSource.value();
return getWalkerDataSource(walker.getClass());
}
/**
* Get a list of RODs allowed by the walker.
* @param walkerClass Class of the walker to query.
* @return The list of allowed reference meta data.
*/
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
Allows allowsDataSource = getWalkerAllowed(walkerClass);
if (allowsDataSource == null)
return Collections.<RMD>emptyList();
return Arrays.asList(allowsDataSource.referenceMetaData());
}
/**
* Get a list of RODs allowed by the walker.
* @param walker Walker to query.
* @return The list of allowed reference meta data.
*/
public static List<RMD> getAllowsMetaData(Walker walker) {
return getAllowsMetaData(walker.getClass());
}
/**
* Determine whether the given walker supports the given data source.
* @param walker Walker to query.
* @param walkerClass Class of the walker to query.
* @param dataSource Source to check for .
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Walker walker, DataSource dataSource) {
Allows allowsDataSource = getWalkerAllowed(walker);
public static boolean isAllowed(Class<? extends Walker> walkerClass, DataSource dataSource) {
Allows allowsDataSource = getWalkerAllowed(walkerClass);
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
@ -182,13 +220,23 @@ public class WalkerManager extends PluginManager<Walker> {
}
/**
* Determine whether the given walker supports the given reference ordered data.
* Determine whether the given walker supports the given data source.
* @param walker Walker to query.
* @param dataSource Source to check for .
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Walker walker, DataSource dataSource) {
return isAllowed(walker.getClass(), dataSource);
}
/**
* Determine whether the given walker supports the given reference ordered data.
* @param walkerClass Class of the walker to query.
* @param rod Source to check.
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Walker walker, RMDTrack rod) {
Allows allowsDataSource = getWalkerAllowed(walker);
public static boolean isAllowed(Class<? extends Walker> walkerClass, RMDTrack rod) {
Allows allowsDataSource = getWalkerAllowed(walkerClass);
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
@ -208,6 +256,27 @@ public class WalkerManager extends PluginManager<Walker> {
return false;
}
/**
* Determine whether the given walker supports the given reference ordered data.
* @param walker Walker to query.
* @param rod Source to check.
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Walker walker, RMDTrack rod) {
return isAllowed(walker.getClass(), rod);
}
/**
* Determine whether the given walker requires the given data source.
* @param walkerClass Class of the walker to query.
* @param dataSource Source to check for.
* @return True if the walker allows this data type. False otherwise.
*/
public static boolean isRequired(Class<? extends Walker> walkerClass, DataSource dataSource) {
Requires requiresDataSource = getWalkerRequirements(walkerClass);
return Arrays.asList(requiresDataSource.value()).contains(dataSource);
}
/**
* Determine whether the given walker requires the given data source.
* @param walker Walker to query.
@ -215,18 +284,26 @@ public class WalkerManager extends PluginManager<Walker> {
* @return True if the walker allows this data type. False otherwise.
*/
public static boolean isRequired(Walker walker, DataSource dataSource) {
Requires requiresDataSource = getWalkerRequirements(walker);
return Arrays.asList(requiresDataSource.value()).contains(dataSource);
return isRequired(walker.getClass(), dataSource);
}
/**
* Get a list of RODs required by the walker.
* @param walkerClass Class of the walker to query.
* @return The list of required reference meta data.
*/
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
Requires requiresDataSource = getWalkerRequirements(walkerClass);
return Arrays.asList(requiresDataSource.referenceMetaData());
}
/**
* Get a list of RODs required by the walker.
* @param walker Walker to query.
* @return True if the walker allows this data type. False otherwise.
* @return The list of required reference meta data.
*/
public static List<RMD> getRequiredMetaData(Walker walker) {
Requires requiresDataSource = getWalkerRequirements(walker);
return Arrays.asList(requiresDataSource.referenceMetaData());
return getRequiredMetaData(walker.getClass());
}
/**
@ -238,6 +315,19 @@ public class WalkerManager extends PluginManager<Walker> {
return walkerType.isAnnotationPresent(Hidden.class);
}
/**
* Extracts filters that the walker has requested be run on the dataset.
* @param walkerClass Class of the walker to inspect for filtering requests.
* @param filterManager Manages the creation of filters.
* @return A non-empty list of filters to apply to the reads.
*/
public static List<SamRecordFilter> getReadFilters(Class<? extends Walker> walkerClass, FilterManager filterManager) {
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
for(Class<? extends SamRecordFilter> filterType: getReadFilterTypes(walkerClass))
filters.add(filterManager.createFilterByType(filterType));
return filters;
}
/**
* Extracts filters that the walker has requested be run on the dataset.
* @param walker Walker to inspect for filtering requests.
@ -245,10 +335,28 @@ public class WalkerManager extends PluginManager<Walker> {
* @return A non-empty list of filters to apply to the reads.
*/
public static List<SamRecordFilter> getReadFilters(Walker walker, FilterManager filterManager) {
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
for(Class<? extends SamRecordFilter> filterType: getReadFilterTypes(walker))
filters.add(filterManager.createFilterByType(filterType));
return filters;
return getReadFilters(walker.getClass(), filterManager);
}
/**
* Gets the type of downsampling method requested by the walker. If an alternative
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walkerClass The class of the walker to interrogate.
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass) {
DownsamplingMethod downsamplingMethod = null;
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
Downsample downsampleParameters = walkerClass.getAnnotation(Downsample.class);
DownsampleType type = downsampleParameters.by();
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
}
return downsamplingMethod;
}
/**
@ -259,17 +367,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Walker walker) {
DownsamplingMethod downsamplingMethod = null;
if( walker.getClass().isAnnotationPresent(Downsample.class) ) {
Downsample downsampleParameters = walker.getClass().getAnnotation(Downsample.class);
DownsampleType type = downsampleParameters.by();
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
}
return downsamplingMethod;
return getDownsamplingMethod(walker.getClass());
}
/**
@ -293,26 +391,55 @@ public class WalkerManager extends PluginManager<Walker> {
/**
* Utility to get the requires attribute from the walker.
* Throws an exception if requirements are missing.
* @param walker Walker to query for required data.
* @param walkerClass Class of the walker to query for required data.
* @return Required data attribute.
*/
private static Requires getWalkerRequirements(Walker walker) {
Class<? extends Walker> walkerClass = walker.getClass();
private static Requires getWalkerRequirements(Class<? extends Walker> walkerClass) {
Requires requiresDataSource = walkerClass.getAnnotation(Requires.class);
if( requiresDataSource == null )
throw new StingException( "Unable to find data types required by walker class " + walkerClass.getName());
return requiresDataSource;
}
/**
* Utility to get the requires attribute from the walker.
* Throws an exception if requirements are missing.
* @param walker Walker to query for required data.
* @return Required data attribute.
*/
private static Requires getWalkerRequirements(Walker walker) {
return getWalkerRequirements(walker.getClass());
}
/**
* Utility to get the forbidden attribute from the walker.
* @param walkerClass Class of the walker to query for required data.
* @return Required data attribute. Null if forbidden info isn't present.
*/
private static Allows getWalkerAllowed(Class<? extends Walker> walkerClass) {
Allows allowsDataSource = walkerClass.getAnnotation(Allows.class);
return allowsDataSource;
}
/**
* Utility to get the forbidden attribute from the walker.
* @param walker Walker to query for required data.
* @return Required data attribute. Null if forbidden info isn't present.
*/
private static Allows getWalkerAllowed(Walker walker) {
Class<? extends Walker> walkerClass = walker.getClass();
Allows allowsDataSource = walkerClass.getAnnotation(Allows.class);
return allowsDataSource;
return getWalkerAllowed(walker.getClass());
}
/**
* Gets the list of filtering classes specified as walker annotations.
* @param walkerClass Class of the walker to inspect.
* @return An array of types extending from SamRecordFilter. Will never be null.
*/
@SuppressWarnings("unchecked")
public static Class<? extends SamRecordFilter>[] getReadFilterTypes(Class<? extends Walker> walkerClass) {
if( !walkerClass.isAnnotationPresent(ReadFilters.class) )
return new Class[0];
return walkerClass.getAnnotation(ReadFilters.class).value();
}
/**
@ -320,10 +447,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @param walker The walker to inspect.
* @return An array of types extending from SamRecordFilter. Will never be null.
*/
private static Class<? extends SamRecordFilter>[] getReadFilterTypes(Walker walker) {
Class<? extends Walker> walkerClass = walker.getClass();
if( !walkerClass.isAnnotationPresent(ReadFilters.class) )
return new Class[0];
return walkerClass.getAnnotation(ReadFilters.class).value();
public static Class<? extends SamRecordFilter>[] getReadFilterTypes(Walker walker) {
return getReadFilterTypes(walker.getClass());
}
}

View File

@ -29,6 +29,8 @@ import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.simpleframework.xml.*;
@ -64,7 +66,7 @@ public class GATKArgumentCollection {
// parameters and their defaults
@ElementList(required = false)
@Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<File> samFiles = new ArrayList<File>();
@Element(required = false)
@ -76,19 +78,19 @@ public class GATKArgumentCollection {
public List<String> readFilters = new ArrayList<String>();
@ElementList(required = false)
@Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
public List<String> intervals = null;
@ElementList(required = false)
@Argument(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
public List<String> excludeIntervals = null;
@Element(required = false)
@Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
@ElementList(required = false)
@Argument(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form <name>,<type>,<file>", required = false)
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form <name>,<type>,<file>", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>();
@Element(required = false)
@ -100,30 +102,30 @@ public class GATKArgumentCollection {
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
@Element(required = false)
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
public String DBSNPFile = null;
@Element(required = false)
@Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
@Input(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
public String HAPMAPFile = null;
@Element(required = false)
@Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
@Input(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
public String HAPMAPChipFile = null;
/** An output file presented to the walker. */
@Element(required = false)
@Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
@Output(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String outFileName = null;
/** An error output file presented to the walker. */
@Element(required = false)
@Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
@Output(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String errFileName = null;
/** A joint file for both 'normal' and error output presented to the walker. */
@Element(required = false)
@Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
@Output(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
public String outErrFileName = null;
@Element(required = false)

View File

@ -30,6 +30,8 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
import net.sf.picard.filter.SamRecordFilter;
import java.util.Collection;
/**
* Manage filters and filter options. Any requests for basic filtering classes
* should ultimately be made through this class.
@ -38,11 +40,6 @@ import net.sf.picard.filter.SamRecordFilter;
* @version 0.1
*/
public class FilterManager extends PluginManager<SamRecordFilter> {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(FilterManager.class);
public FilterManager() {
super(SamRecordFilter.class,"filter","Filter");
}
@ -50,10 +47,14 @@ public class FilterManager extends PluginManager<SamRecordFilter> {
/**
* Instantiate a filter of the given type. Along the way, scream bloody murder if
* the filter is not available.
* @param filterType
* @return
* @param filterType The type of the filter
* @return The filter
*/
public SamRecordFilter createFilterByType(Class<? extends SamRecordFilter> filterType) {
return this.createByName(getName(filterType));
}
public Collection<Class<? extends SamRecordFilter>> getValues() {
return this.pluginsByName.values();
}
}

View File

@ -158,10 +158,12 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
source.field.getType(),
"variants_out",
"varout",
false,
source.isMultiValued(),
getCollectionComponentType(source.field),
source.isHidden(),
null );
}
@ -173,13 +175,15 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation),
return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
GenotypeWriterFactory.GENOTYPE_FORMAT.class,
"variant_output_format",
"vf",
"Format to be used to represent variants; default is VCF",
false,
false,
false,
null,
source.isHidden(),
null,
null,

View File

@ -97,10 +97,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
source.field.getType(),
DEFAULT_ARGUMENT_FULLNAME,
DEFAULT_ARGUMENT_SHORTNAME,
false,
source.isMultiValued(),
getCollectionComponentType(source.field),
source.isHidden(),
null );
}
@ -112,13 +114,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation),
return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
int.class,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Compression level to use for writing BAM files",
false,
false,
false,
null,
source.isHidden(),
null,
null,

View File

@ -1,125 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.picard.PicardException;
import net.sf.picard.sam.ReservedTagConstants;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.lang.reflect.Constructor;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.PriorityQueue;
// Should replace picard class with the same name
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
private Reads sourceInfo;
private final Comparator<SAMRecord> comparator;
private final SAMFileReader reader;
private final SamFileHeaderMerger mHeaderMerger;
/**
* Constructs an iterator for iteration over the supplied SAM file that will be
* able to compare itself to other ComparableSAMRecordIterator instances using
* the supplied comparator for ordering SAMRecords.
*
* @param sam the SAM file to read records from
* @param comparator the Comparator to use to provide ordering fo SAMRecords
*/
public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, final Comparator<SAMRecord> comparator) {
super(sam.iterator());
this.reader = sam;
this.comparator = comparator;
mHeaderMerger = samHeaderMerger;
}
public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, Iterator<SAMRecord> iterator, final Comparator<SAMRecord> comparator) {
super(iterator); // use the provided iterator
this.reader = sam;
this.comparator = comparator;
mHeaderMerger = samHeaderMerger;
}
public Reads getSourceInfo() {
if (sourceInfo == null)
throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
return sourceInfo;
}
/**
* Returns the reader from which this iterator was constructed.
*
* @return the SAMFileReader
*/
public SAMFileReader getReader() {
return reader;
}
/**
* Compares this iterator to another comparable iterator based on the next record
* available in each iterator. If the two comparable iterators have different
* comparator types internally an exception is thrown.
*
* @param that another iterator to compare to
*
* @return a negative, 0 or positive number as described in the Comparator interface
*/
public int compareTo(final ComparableSamRecordIterator that) {
if (this.comparator.getClass() != that.comparator.getClass()) {
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
"have different orderings internally");
}
final SAMRecord record = this.peek();
final SAMRecord record2 = that.peek();
record.setHeader(mHeaderMerger.getMergedHeader());
record2.setHeader(mHeaderMerger.getMergedHeader());
int index, index2;
try {
index = mHeaderMerger.getMergedHeader().getSequenceIndex(record.getReferenceName());
record.setReferenceIndex(index);
index2 = mHeaderMerger.getMergedHeader().getSequenceIndex(record2.getReferenceName());
record2.setReferenceIndex(index2);
} catch (Exception e) {
throw new StingException("MergingSamRecordIterator2: unable to correct the reference index for read " + record.getReadName() + " or record " + record2.getReadName(),e);
}
return comparator.compare(record, record2);
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -31,26 +31,28 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* @author aaron
* <p/>
* Class RMDTrackManager
* <p/>
* Find the available track builders, and create the requisite tracks from the command line.
* Find the available track builders, and create the requisite tracks from the command line.
*
* In Tribble RMD tracks have two classes:
* - a Feature that is the model/view for the data
* - a Codec that is the controller to generate the Feature.
*
* In this class, the track types are the Codecs. The track record types are the Features.
*/
public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
// the input strings we use to create RODs from
List<RMDTriplet> inputs = new ArrayList<RMDTriplet>();
// create an active mapping of builder instances, and a map of the name -> class for convenience
Map<String, RMDTrackBuilder> availableTracks;
Map<String, Class> availableTrackClasses;
/** the tracks that are available to us, associated with their builder */
Map<String, RMDTrackBuilder> availableTrackBuilders;
/** the classes names, with their class description (think the Controller Codecs) */
Map<String, Class> availableTrackTypes;
/** the available track record types (think the Model/View Features) */
Map<String, Class> availableTrackRecordTypes;
/** Create a new track plugin manager. */
public RMDTrackManager() {
@ -65,28 +67,56 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
* @return a list of RMDTracks, one for each -B option
*/
public List<RMDTrack> getReferenceMetaDataSources(List<String> triplets) {
if (availableTracks == null || availableTrackClasses == null) initialize(triplets);
initializeTrackTypes();
initializeTriplets(triplets);
// try and make the tracks given their requests
return createRequestedTrackObjects(availableTracks, availableTrackClasses);
return createRequestedTrackObjects();
}
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
initializeTrackTypes();
initializeTrackRecordTypes();
Set<String> names = new TreeSet<String>();
for (Map.Entry<String, Class> availableTrackRecordType: availableTrackRecordTypes.entrySet()) {
if (trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
/**
* initialize our lists of tracks and builders
* initialize our lists of triplets
* @param triplets the input to the GATK, as a list of strings passed in through the -B options
*/
private void initialize(List<String> triplets) {
private void initializeTriplets(List<String> triplets) {
// NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
if (inputs.size() > 0 || triplets.size() == 0)
return;
for (String value: triplets) {
String[] split = value.split(",");
if (split.length != 3) throw new IllegalArgumentException(value + " is not a valid reference metadata track description");
inputs.add(new RMDTriplet(split[0], split[1], split[2]));
}
}
/**
* initialize our lists of tracks and builders
*/
private void initializeTrackTypes() {
if (availableTrackBuilders != null && availableTrackTypes != null)
return;
// create an active mapping of builder instances, and a map of the name -> class for convenience
availableTracks = new HashMap<String, RMDTrackBuilder>();
availableTrackClasses = new HashMap<String, Class>();
availableTrackBuilders = new HashMap<String, RMDTrackBuilder>();
availableTrackTypes = new HashMap<String, Class>();
createBuilderObjects();
}
/**
@ -98,8 +128,24 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
RMDTrackBuilder builder = this.createByName(builderName);
Map<String, Class> mapping = builder.getAvailableTrackNamesAndTypes();
for (String name : mapping.keySet()) {
availableTracks.put(name.toUpperCase(), builder);
availableTrackClasses.put(name.toUpperCase(), mapping.get(name));
availableTrackBuilders.put(name.toUpperCase(), builder);
availableTrackTypes.put(name.toUpperCase(), mapping.get(name));
}
}
}
/**
* initialize our list of track record types
*/
private void initializeTrackRecordTypes() {
if (availableTrackRecordTypes != null)
return;
availableTrackRecordTypes = new HashMap<String, Class>();
for (RMDTrackBuilder builder : availableTrackBuilders.values()) {
Map<String, Class> mapping = builder.getAvailableTrackNamesAndRecordTypes();
for (String name : mapping.keySet()) {
availableTrackRecordTypes.put(name.toUpperCase(), mapping.get(name));
}
}
}
@ -107,22 +153,18 @@ public class RMDTrackManager extends PluginManager<RMDTrackBuilder> {
/**
* create the requested track objects
*
* @param availableTracks the tracks that are available to us, associated with their builder
* @param availableTrackClasses the classes names, with their class description
*
* @return a list of the tracks, one for each of the requested input tracks
*/
private List<RMDTrack> createRequestedTrackObjects(Map<String, RMDTrackBuilder> availableTracks, Map<String, Class> availableTrackClasses) {
private List<RMDTrack> createRequestedTrackObjects() {
// create of live instances of the tracks
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
// create instances of each of the requested types
for (RMDTriplet trip : inputs) {
RMDTrackBuilder b = availableTracks.get(trip.getType().toUpperCase());
RMDTrackBuilder b = availableTrackBuilders.get(trip.getType().toUpperCase());
if (b == null) throw new StingException("Unable to find track for " + trip.getType());
tracks.add(b.createInstanceOfTrack(availableTrackClasses.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile())));
tracks.add(b.createInstanceOfTrack(availableTrackTypes.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile())));
}
return tracks;
}
}

View File

@ -44,6 +44,9 @@ public interface RMDTrackBuilder {
/** @return a list of all available tracks types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes();
/** @return a list of all available track record types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndRecordTypes();
/**
* create a RMDTrack of the specified type
*

View File

@ -75,15 +75,19 @@ public class RODTrackBuilder implements RMDTrackBuilder {
return new RODRMDTrack(targetClass, name, inputFile, createROD(name,targetClass,inputFile));
}
/** @return a map of all available tracks we currently have access to create */
/** @return a map of all available track types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
Map<String, Class> ret = new HashMap<String, Class>();
for (String name : Types.keySet())
ret.put(name, Types.get(name));
return ret;
return new HashMap<String, Class>(Types);
}
/**
/** @return a map of all available track record types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
return new HashMap<String, Class>(Types);
}
/**
* Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with
* <name>, of type <type> that reads its input from <file>.
*

View File

@ -35,7 +35,6 @@ import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.index.interval.IntervalIndexCreator;
import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -80,12 +79,20 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
super(FeatureCodec.class, "Codecs", "Codec");
}
/** @return a list of all available tracks we currently have access to create */
/** @return a list of all available track types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return new HashMap<String, Class>(this.pluginsByName);
}
/** @return a list of all available track record types we currently have access to create */
@Override
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
Map<String, Class> classes = new HashMap<String, Class>();
for (String c : this.pluginsByName.keySet())
classes.put(c, this.pluginsByName.get(c));
for (String name: this.pluginsByName.keySet()) {
FeatureCodec codec = this.createByName(name);
classes.put(name, codec.getFeatureType());
}
return classes;
}
@ -115,11 +122,12 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
/**
* create a feature reader of the specified type
* @param targetClass the target codec type
* @param name the target name
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, String name, File inputFile) {
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = null;
Pair<BasicFeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
else
@ -133,6 +141,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* exists.
*
* @param targetClass the codec class type
* @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
@ -156,6 +165,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
/**
* create a linear feature reader, where we create the index ahead of time
* @param targetClass the target class
* @param name the name of the codec
* @param inputFile the tribble file to parse
* @return the input file as a FeatureReader
*/
@ -264,7 +274,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param indexFile the index file location
* @param lock the locking object
* @return the index object
* @throws IOException
* @throws IOException when unable to create the new index
*/
private static Index createNewIndex(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, FSLockWithShared lock) throws IOException {
Index index = createIndexInMemory(inputFile, codec);
@ -296,7 +306,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param inputFile the input file
* @param codec the codec
* @return a LinearIndex, given the file location
* @throws IOException
* @throws IOException when unable to create the index in memory
*/
private static Index createIndexInMemory(File inputFile, FeatureCodec codec) throws IOException {
// this can take a while, let them know what we're doing
@ -317,7 +327,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
* @param contigList the contig list, in coordinate order, this is allowed to be null
* @return a SAMSequenceDictionary, WITHOUT contig sizes
*/
private static final SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet<String> contigList) {
private static SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet<String> contigList) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
if (contigList == null) return dict;

View File

@ -487,7 +487,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
/**
* How should we represent a clipped bases in a read?
*/
private enum ClippingRepresentation {
public enum ClippingRepresentation {
WRITE_NS, // change the bases to Ns
WRITE_Q0S, // change the quality scores to Q0
WRITE_NS_Q0S, // change the quality scores to Q0 and write Ns

View File

@ -31,6 +31,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
@ -42,7 +43,7 @@ import org.broadinstitute.sting.commandline.Argument;
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
@Output(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
SAMFileWriter outputBamFile = null;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
String readGroup = null;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.Feature;
import java.lang.annotation.Documented;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
@ -25,5 +27,5 @@ import java.lang.annotation.RetentionPolicy;
@Retention(RetentionPolicy.RUNTIME)
public @interface RMD {
String name();
Class type();
Class type() default Feature.class;
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
@ -85,7 +86,7 @@ public class CovariateCounterWalker extends LocusWalker<CovariateCounterWalker.C
/////////////////////////////
// Command Line Arguments
/////////////////////////////
@Argument(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file")
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file")
public PrintStream RECAL_FILE;
@Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)

View File

@ -0,0 +1,334 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import net.sf.samtools.SAMFileWriter;
import org.broadinstitute.sting.commandline.*;
import java.io.File;
import java.lang.annotation.Annotation;
import java.util.*;
public abstract class ArgumentDefinitionField extends ArgumentField {
protected final ArgumentDefinition argumentDefinition;
protected ArgumentDefinitionField(ArgumentDefinition argumentDefinition) {
this.argumentDefinition = argumentDefinition;
}
@Override protected String getRawFieldName() { return argumentDefinition.fullName; }
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return argumentDefinition.ioType.annotationClass; }
@Override protected String getDoc() { return escape(argumentDefinition.doc); }
@Override protected String getFullName() { return escape(argumentDefinition.fullName); }
@Override protected String getShortName() { return escape(argumentDefinition.shortName); }
@Override protected boolean isRequired() { return argumentDefinition.required; }
@Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); }
@Override protected String getValidation() { return escape(argumentDefinition.validation); }
protected static final String REQUIRED_TEMPLATE = " + \" %1$s \" + %2$s.format(%3$s)";
protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=%2$s)";
protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=%2$s)";
protected static final String FLAG_TEMPLATE = " + (if (%3$s) \" %1$s \" else \"\")";
public final String getCommandLineAddition() {
return String.format(getCommandLineTemplate(), getCommandLineParam(), getCommandLineFormat(), getFieldName());
}
protected String getCommandLineParam() {
return (argumentDefinition.shortName != null)
? "-" + argumentDefinition.shortName
: "--" + argumentDefinition.fullName;
}
protected String getCommandLineFormat() {
return "\"%s\"";
}
@Override
protected String getScatterGatherAnnotation() {
return "";
}
protected String getCommandLineTemplate() {
return isRequired() ? REQUIRED_TEMPLATE : OPTIONAL_TEMPLATE;
}
public static List<? extends ArgumentField> getArgumentFields(Class<?> classType) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
for (ArgumentSource argumentSource: ParsingEngine.extractArgumentSources(classType))
for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
argumentFields.addAll(getArgumentFields(argumentDefinition));
return argumentFields;
}
private static final List<String> intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
private static List<? extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition) {
if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
boolean scatter = "intervals".equals(argumentDefinition.fullName);
return Arrays.asList(
new IntervalFileArgumentField(argumentDefinition, scatter),
new IntervalStringArgumentField(argumentDefinition));
// ROD Bindings are set by the RodBindField
} else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
// TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
return Collections.singletonList(new RodBindArgumentField(argumentDefinition, argumentDefinition.required));
//return Collections.<ArgumentField>emptyList();
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputArgumentField(argumentDefinition), new IndexFilesField());
} else if (argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Collections.singletonList(new InputArgumentField(argumentDefinition));
} else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) {
return Collections.singletonList(new OutputArgumentField(argumentDefinition));
} else if (argumentDefinition.isFlag) {
return Collections.singletonList(new FlagArgumentField(argumentDefinition));
} else if (argumentDefinition.isMultiValued) {
return Collections.singletonList(new MultiValuedArgumentField(argumentDefinition));
} else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) {
boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>();
ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat);
fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field));
return fields;
} else {
boolean useFormat = useFormatter(argumentDefinition.argumentType);
List<ArgumentField> fields = new ArrayList<ArgumentField>();
ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat);
fields.add(field);
if (useFormat) fields.add(new FormatterArgumentField(field));
return fields;
}
}
// if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT)
// Change intervals to an input file, and optionally scatter it.
private static class IntervalFileArgumentField extends InputArgumentField {
private final boolean scatter;
public IntervalFileArgumentField(ArgumentDefinition argumentDefinition, boolean scatter) {
super(argumentDefinition);
this.scatter = scatter;
}
@Override protected boolean isMultiValued() { return !this.scatter && super.isMultiValued(); }
@Override public boolean isScatter() { return this.scatter; }
@Override protected String getScatterGatherAnnotation() {
return scatter ? String.format("@Scatter(classOf[IntervalScatterFunction])%n") : super.getScatterGatherAnnotation();
}
@Override
protected String getExclusiveOf() {
StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf());
if (exclusiveOf.length() > 0)
exclusiveOf.append(",");
exclusiveOf.append(escape(argumentDefinition.fullName)).append("String");
return exclusiveOf.toString();
}
}
// if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT)
// Change intervals to a string but as an argument.
private static class IntervalStringArgumentField extends ArgumentDefinitionField {
public IntervalStringArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@SuppressWarnings("unchecked")
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return Argument.class; }
@Override protected Class<?> getInnerType() { return String.class; }
@Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; }
@Override protected String getFullName() { return super.getFullName() + "String"; }
@Override protected String getFieldType() { return "List[String]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
@Override
protected String getExclusiveOf() {
StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf());
if (exclusiveOf.length() > 0)
exclusiveOf.append(",");
exclusiveOf.append(escape(argumentDefinition.fullName));
return exclusiveOf.toString();
}
}
// if (argumentDefinition.ioType == ArgumentIOType.INPUT)
// Map all inputs to files. Handles multi valued files.
private static class InputArgumentField extends ArgumentDefinitionField {
public InputArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return File.class; }
@Override protected String getFieldType() { return String.format(isMultiValued() ? "List[%s]" : "%s", getRawFieldType()); }
@Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
@Override protected String getCommandLineTemplate() {
return isMultiValued() ? REPEAT_TEMPLATE : super.getCommandLineTemplate();
}
protected String getRawFieldType() { return "File"; }
protected boolean isMultiValued() { return argumentDefinition.isMultiValued; }
}
// if (argumentDefinition.ioType == ArgumentIOType.OUTPUT)
// Map all outputs to files.
private static class OutputArgumentField extends ArgumentDefinitionField {
public OutputArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return File.class; }
@Override protected String getFieldType() { return "File"; }
@Override protected String getDefaultValue() { return "_"; }
@Override public boolean isGather() { return true; }
@Override protected String getScatterGatherAnnotation() {
return String.format(SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)
? "@Gather(classOf[BamGatherFunction])%n"
: "@Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])%n");
}
}
// if (argumentDefinition.isFlag)
// Booleans should be set on the commandline only if they are true.
private static class FlagArgumentField extends ArgumentDefinitionField {
public FlagArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return boolean.class; }
@Override protected String getFieldType() { return "Boolean"; }
@Override protected String getDefaultValue() { return "_"; }
@Override protected String getCommandLineTemplate() { return FLAG_TEMPLATE; }
}
// if (argumentDefinition.isMultiValued)
// Multi value arguments are mapped to List[] and use repeat.
private static class MultiValuedArgumentField extends ArgumentDefinitionField {
public MultiValuedArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return mapType(argumentDefinition.componentType); }
@Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
}
// if (!argumentDefinition.required && useOption(argumentDefinition.argumentType))
// Any optional arguments that are primitives / enums are wrapped in options.
private static class OptionedArgumentField extends ArgumentDefinitionField {
private final boolean useFormatter;
public OptionedArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) {
super(argumentDefinition);
this.useFormatter = useFormatter;
}
@Override protected Class<?> getInnerType() { return mapType(argumentDefinition.argumentType); }
@Override protected String getFieldType() { return String.format("Option[%s]", getType(getInnerType())); }
@Override protected String getDefaultValue() { return "None"; }
@Override protected String getCommandLineTemplate() { return OPTIONAL_TEMPLATE; }
@Override protected String getCommandLineFormat() {
return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat();
}
}
// Any other @Arguments
private static class DefaultArgumentField extends ArgumentDefinitionField {
private final boolean useFormatter;
public DefaultArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) {
super(argumentDefinition);
this.useFormatter = useFormatter;
}
@Override protected Class<?> getInnerType() { return mapType(argumentDefinition.argumentType); }
@Override protected String getFieldType() { return getType(getInnerType()); }
@Override protected String getDefaultValue() { return "_"; }
@Override protected String getCommandLineFormat() {
return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat();
}
}
/**
* The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file.
*/
public static class RodBindArgumentField extends InputArgumentField {
private boolean isRequired;
public RodBindArgumentField(ArgumentDefinition argumentDefinition, boolean isRequired) {
super(argumentDefinition);
this.isRequired = isRequired;
}
@Override protected boolean isRequired() { return this.isRequired; }
@Override protected String getRawFieldType() { return "RodBind"; }
}
/**
* Adds optional inputs for the indexes of any bams or sams added to this function.
*/
private static class IndexFilesField extends ArgumentField {
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return Input.class; }
@Override public String getCommandLineAddition() { return ""; }
@Override protected String getDoc() { return "Dependencies on any index files for any bams or sams added to input_files"; }
@Override protected String getFullName() { return "index_files"; }
@Override protected boolean isRequired() { return false; }
@Override protected String getFieldType() { return "List[File]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected Class<?> getInnerType() { return File.class; }
@Override protected String getRawFieldName() { return "index_files"; }
@Override protected String getFreezeFields() {
return String.format(
"index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" +
"index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n");
}
}
private static class FormatterArgumentField extends ArgumentField {
private final ArgumentField argumentField;
public FormatterArgumentField(ArgumentField argumentField) {
this.argumentField = argumentField;
}
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return Argument.class; }
@Override public String getCommandLineAddition() { return ""; }
@Override protected String getDoc() { return "Format string for " + this.argumentField.getFullName(); }
@Override protected String getFullName() { return this.argumentField.getFullName() + "Format"; }
@Override protected boolean isRequired() { return false; }
@Override protected String getFieldType() { return "String"; }
@Override protected String getDefaultValue() { return "\"%s\""; }
@Override protected Class<?> getInnerType() { return String.class; }
@Override protected String getRawFieldName() { return this.argumentField.getRawFieldName() + "Format"; }
}
}

View File

@ -0,0 +1,215 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.util.*;
public abstract class ArgumentField {
public Collection<String> getImportStatements() {
List<String> imports = new ArrayList<String>();
for (Class<?> importClass: getImportClasses()) {
if (!isBuiltIn(importClass))
imports.add("import " + importClass.getName().replace("$", "."));
}
return imports;
}
/**
* Returns true if a class is built in and doesn't need to be imported.
* @param argType The class to check.
* @return true if the class is built in and doesn't need to be imported
*/
private static boolean isBuiltIn(Class<?> argType) {
return argType.isPrimitive() || argType == String.class || Number.class.isAssignableFrom(argType);
}
/** @return Scala code defining the argument and it's annotation. */
public final String getArgumentAddition() {
return String.format("%n" +
"/** %s */%n" +
"@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" +
"%svar %s: %s = %s%n",
getDoc(),
getAnnotationIOClass().getSimpleName(),
getFullName(),
getShortName(),
getDoc(),
isRequired(),
getExclusiveOf(),
getValidation(),
getScatterGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue());
}
/** @return Scala code to append to the command line. */
public abstract String getCommandLineAddition();
// Argument Annotation
/** @return Documentation for the annotation. */
protected abstract String getDoc();
/** @return Annotation class of the annotation. */
protected abstract Class<? extends Annotation> getAnnotationIOClass();
/** @return Full name for the annotation. */
protected abstract String getFullName();
/** @return Short name for the annotation or "". */
protected String getShortName() { return ""; }
/** @return true if the argument is required. */
protected abstract boolean isRequired();
/** @return A comma separated list of arguments that may be substituted for this field. */
protected String getExclusiveOf() { return ""; }
/** @return A validation string for the argument. */
protected String getValidation() { return ""; }
/** @return A scatter or gather annotation with a line feed, or "". */
protected String getScatterGatherAnnotation() { return ""; }
// Scala
/** @return The scala field type. */
protected abstract String getFieldType();
/** @return The scala default value. */
protected abstract String getDefaultValue();
/** @return The class of the field, or the component type if the scala field is a collection. */
protected abstract Class<?> getInnerType();
/** @return A custom command for overriding freeze. */
protected String getFreezeFields() { return ""; }
@SuppressWarnings("unchecked")
protected Collection<Class<?>> getImportClasses() {
return Arrays.asList(this.getInnerType(), getAnnotationIOClass());
}
/** @return True if this field uses @Scatter. */
public boolean isScatter() { return false; }
/** @return True if this field uses @Gather. */
public boolean isGather() { return false; }
/** @return The raw field name, which will be checked against scala build in types. */
protected abstract String getRawFieldName();
/** @return The field name checked against reserved words. */
protected final String getFieldName() {
return getFieldName(this.getRawFieldName());
}
/**
* @param rawFieldName The raw field name
* @return The field name checked against reserved words.
*/
protected static String getFieldName(String rawFieldName) {
String fieldName = rawFieldName;
if (!StringUtils.isAlpha(fieldName.substring(0,1)))
fieldName = "_" + fieldName;
if (isReserved(fieldName) || fieldName.contains("-"))
fieldName = "`" + fieldName + "`";
return fieldName;
}
/** via http://www.scala-lang.org/sites/default/files/linuxsoft_archives/docu/files/ScalaReference.pdf */
private static final List<String> reservedWords = Arrays.asList(
"abstract", "case", "catch", "class", "def",
"do", "else", "extends", "false", "final",
"finally", "for", "forSome", "if", "implicit",
"import", "lazy", "match", "new", "null",
"object", "override", "package", "private", "protected",
"return", "sealed", "super", "this", "throw",
"trait", "try", "true", "type", "val",
"var", "while", "with", "yield");
protected static boolean isReserved(String word) {
return reservedWords.contains(word);
}
/**
* On primitive types returns the capitalized scala type.
* @param argType The class to check for options.
* @return the simple name of the class.
*/
protected static String getType(Class<?> argType) {
String type = argType.getSimpleName();
if (argType.isPrimitive())
type = StringUtils.capitalize(type);
if ("Integer".equals(type))
type = "Int";
return type;
}
protected static String escape(String string) {
return (string == null) ? "" : StringEscapeUtils.escapeJava(string);
}
/**
* @param argType The class to check for options.
* @return true if option should be used.
*/
protected static boolean useOption(Class<?> argType) {
return (argType.isPrimitive()) || (Number.class.isAssignableFrom(argType)) || (argType.isEnum());
}
/**
* @param argType The class to check for options.
* @return true if option should be used.
*/
protected static boolean useFormatter(Class<?> argType) {
return (argType.equals(Double.class) || argType.equals(Double.TYPE) ||
argType.equals(Float.class) || argType.equals(Float.TYPE));
}
// TODO: Use an annotation, type descriptor, anything but hardcoding these lists!
protected static Class<?> mapType(Class<?> clazz) {
if (InputStream.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class;
if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class;
if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;
if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class;
return clazz;
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import java.util.Collection;
/**
* Finds all command line programs.
*/
public class CommandLineProgramManager extends PluginManager<CommandLineProgram> {
public CommandLineProgramManager() {
super(CommandLineProgram.class, "CommandLineProgram", "CLP");
}
public Collection<Class<? extends CommandLineProgram>> getValues() {
return this.pluginsByName.values();
}
}

View File

@ -0,0 +1,232 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import net.sf.picard.filter.SamRecordFilter;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;
/**
* Generates Queue modules that can be used to run GATK walkers.
*
* ArgumentCollections are flattened into a single module.
*/
public class GATKExtensionsGenerator extends CommandLineProgram {
private static final Logger logger = Logger.getRootLogger();
public static final String GATK_EXTENSIONS_PACKAGE_NAME = "org.broadinstitute.sting.queue.extensions.gatk";
private static final String COMMANDLINE_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
private static final String FILTER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
private static final String WALKER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
@Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true)
public File outputDirectory;
CommandLineProgramManager clpManager = new CommandLineProgramManager();
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
WalkerManager walkerManager = new WalkerManager();
FilterManager filterManager = new FilterManager();
RMDTrackManager rmdTrackManager = new RMDTrackManager();
/**
* Required main method implementation.
* @param argv Command-line arguments.
*/
public static void main(String[] argv) {
try {
start(new GATKExtensionsGenerator(), argv);
System.exit(CommandLineProgram.result);
} catch (Exception e) {
exitSystemWithError(e);
}
}
@Override
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
List<ArgumentTypeDescriptor> typeDescriptors = new ArrayList<ArgumentTypeDescriptor>();
typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine));
typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine));
return typeDescriptors;
}
@Override
protected int execute() {
try {
if (!outputDirectory.isDirectory() && !outputDirectory.mkdirs())
throw new StingException("Unable to create output directory: " + outputDirectory);
for (Class<? extends CommandLineProgram> clp: clpManager.getValues()) {
if (!isGatkProgram(clp))
continue;
String clpClassName = clpManager.getName(clp);
writeClass("org.broadinstitute.sting.queue.function.JarCommandLineFunction", COMMANDLINE_PACKAGE_NAME, clpClassName,
"", ArgumentDefinitionField.getArgumentFields(clp));
if (clp == CommandLineGATK.class) {
for (Entry<String, Collection<Class<? extends Walker>>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) {
for(Class<? extends Walker> walkerType: walkersByPackage.getValue()) {
String walkerName = walkerManager.getName(walkerType);
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(walkerType));
argumentFields.addAll(RodBindField.getRodArguments(walkerType, rmdTrackManager));
argumentFields.addAll(ReadFilterField.getFilterArguments(walkerType));
writeClass(COMMANDLINE_PACKAGE_NAME + "." + clpClassName, WALKER_PACKAGE_NAME,
walkerName, String.format("analysis_type = \"%s\"%n%n", walkerName), argumentFields);
}
}
}
}
for (Class<? extends SamRecordFilter> filter: filterManager.getValues()) {
String filterName = filterManager.getName(filter);
writeFilter(FILTER_PACKAGE_NAME, filterName, ArgumentDefinitionField.getArgumentFields(filter));
}
return 0;
} catch (IOException exception) {
logger.error("Error generating queue output.", exception);
return 1;
}
}
private static final List<String> gatkPackages = Arrays.asList(
"org.broadinstitute.sting.gatk",
"org.broadinstitute.sting.analyzecovariates");
private boolean isGatkProgram(Class<?> clazz) {
if (clazz.getPackage() == null)
return false;
String classPackage = clazz.getPackage().getName();
for (String gatkPackage : gatkPackages)
if (classPackage.startsWith(gatkPackage))
return true;
return false;
}
private void writeClass(String baseClass, String packageName, String className, String constructor,
List<? extends ArgumentField> argumentFields) throws IOException {
String content = getContent(CLASS_TEMPLATE, baseClass, packageName, className, constructor, "", argumentFields);
writeFile(packageName + "." + className, content);
}
private void writeFilter(String packageName, String className, List<? extends ArgumentField> argumentFields) throws IOException {
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
packageName, className, "", String.format(" + \" -read_filter %s\"", className), argumentFields);
writeFile(packageName + "." + className, content);
}
private void writeFile(String fullClassName, String content) throws IOException {
File outputFile = new File(outputDirectory, fullClassName.replace(".", "/") + ".scala");
if (outputFile.exists()) {
String existingContent = FileUtils.readFileToString(outputFile);
if (StringUtils.equals(content, existingContent))
return;
}
FileUtils.writeStringToFile(outputFile, content);
}
private static String getContent(String scalaTemplate, String baseClass, String packageName, String className,
String constructor, String commandLinePrefix, List<? extends ArgumentField> argumentFields) {
StringBuilder arguments = new StringBuilder();
StringBuilder commandLine = new StringBuilder(commandLinePrefix);
Set<String> importSet = new HashSet<String>();
boolean isScatter = false;
boolean isGather = false;
List<String> freezeFields = new ArrayList<String>();
for(ArgumentField argumentField: argumentFields) {
arguments.append(argumentField.getArgumentAddition());
commandLine.append(argumentField.getCommandLineAddition());
importSet.addAll(argumentField.getImportStatements());
freezeFields.add(argumentField.getFreezeFields());
isScatter |= argumentField.isScatter();
isGather |= argumentField.isGather();
}
if (isScatter) {
importSet.add("import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction");
importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Scatter");
baseClass += " with ScatterGatherableFunction";
}
if (isGather)
importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Gather");
// Sort the imports so that the are always in the same order.
List<String> sortedImports = new ArrayList<String>(importSet);
Collections.sort(sortedImports);
StringBuffer freezeFieldOverride = new StringBuffer();
for (String freezeField: freezeFields)
freezeFieldOverride.append(freezeField);
if (freezeFieldOverride.length() > 0) {
freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n"));
freezeFieldOverride.append(String.format("}%n%n"));
}
// see CLASS_TEMPLATE and TRAIT_TEMPLATE below
return String.format(scalaTemplate, packageName, StringUtils.join(sortedImports, NEWLINE),
className, baseClass, constructor, arguments, freezeFieldOverride, commandLine);
}
private static final String NEWLINE = String.format("%n");
private static final String CLASS_TEMPLATE = "package %s%n"+
"%s%n" +
"class %s extends %s {%n" +
"%s%s%n" +
"%soverride def commandLine = super.commandLine%s%n" +
"}%n";
private static final String TRAIT_TEMPLATE = "package %s%n"+
"%s%n" +
"trait %s extends %s {%n" +
"%s%s%n" +
"%sabstract override def commandLine = super.commandLine%s%n" +
"}%n";
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.util.ArrayList;
import java.util.List;
public class ReadFilterField {
/**
* Adds an argument for each read filters listed on the walker.
* @param walkerClass the class of the walker
* @return the list of argument fields
*/
public static List<ArgumentField> getFilterArguments(Class<? extends Walker> walkerClass) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
for(Class<? extends SamRecordFilter> filter: WalkerManager.getReadFilterTypes(walkerClass))
argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(filter));
return argumentFields;
}
}

View File

@ -0,0 +1,125 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.gatk;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.io.File;
import java.lang.annotation.Annotation;
import java.util.ArrayList;
import java.util.List;
/**
* Allows user to specify the rod file but locks in the track name and the track type.
*/
public class RodBindField extends ArgumentField {
public static final String ROD_BIND_FIELD = "rodBind";
private final String trackName;
private final String typeName;
private final List<RodBindField> relatedFields;
private final boolean isRequired;
public RodBindField(String trackName, String typeName, List<RodBindField> relatedFields, boolean isRequired) {
this.trackName = trackName;
this.typeName = typeName;
this.relatedFields = relatedFields;
this.isRequired = isRequired;
}
@SuppressWarnings("unchecked")
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return Input.class; }
@Override protected Class<?> getInnerType() { return File.class; }
@Override protected String getFullName() { return escape(getRawFieldName()); }
@Override protected String getFieldType() { return "File"; }
@Override protected String getDefaultValue() { return "_"; }
@Override protected String getRawFieldName() { return this.trackName + this.typeName; }
@Override protected String getDoc() { return escape(this.typeName + " " + this.trackName); }
@Override protected boolean isRequired() { return this.isRequired; }
@Override public String getCommandLineAddition() {
return String.format(this.useOption()
? " + optional(\" -B %s,%s,\", %s)"
: " + \" -B %s,%s,\" + %s",
this.trackName, this.typeName, getFieldName());
}
private boolean useOption() {
return !this.isRequired || (relatedFields.size() > 1);
}
@Override protected String getExclusiveOf() {
StringBuilder exclusiveOf = new StringBuilder();
// TODO: Stop allowing the generic "rodBind" triplets to satisfy the requirement after @Requires are fixed.
if (this.isRequired)
exclusiveOf.append(ROD_BIND_FIELD);
for (RodBindField relatedField: relatedFields)
if (relatedField != this) {
if (exclusiveOf.length() > 0)
exclusiveOf.append(",");
exclusiveOf.append(relatedField.getFieldName());
}
return exclusiveOf.toString();
}
public static List<ArgumentField> getRodArguments(Class<? extends Walker> walkerClass, RMDTrackManager rmdTrackManager) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
List<RMD> requires = WalkerManager.getRequiredMetaData(walkerClass);
List<RMD> allows = WalkerManager.getAllowsMetaData(walkerClass);
for (RMD required: requires) {
List<RodBindField> fields = new ArrayList<RodBindField>();
String trackName = required.name();
if ("*".equals(trackName)) {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, true));
} else {
for (String typeName: rmdTrackManager.getTrackRecordTypeNames(required.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);
}
for (RMD allowed: allows) {
List<RodBindField> fields = new ArrayList<RodBindField>();
String trackName = allowed.name();
if ("*".equals(trackName)) {
// TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
//fields.add(new RodBindArgumentField(argumentDefinition, false));
} else {
for (String typeName: rmdTrackManager.getTrackRecordTypeNames(allowed.type()))
fields.add(new RodBindField(trackName, typeName, fields, true));
}
argumentFields.addAll(fields);
}
return argumentFields;
}
}

View File

@ -0,0 +1,63 @@
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
class UnifiedGenotyperExample extends QScript {
qscript =>
@Input(doc="gatk jar file")
var gatkJar: File = _
@Input(doc="bam files", shortName="I")
var bamFiles: List[File] = Nil
@Input(doc="interval list", shortName="L")
var intervals: File = _
@Input(doc="referenceFile", shortName="R")
var referenceFile: File = _
@Argument(doc="filter names", shortName="filter")
var filterNames: List[String] = Nil
@Argument(doc="filter expressions", shortName="filterExpression")
var filterExpressions: List[String] = Nil
@Argument(doc="job queue", shortName="queue", required=false)
var jobQueue = "broad"
trait UnifiedGenotyperArguments extends CommandLineGATK {
this.jobQueue = qscript.jobQueue
this.jarFile = qscript.gatkJar
this.intervals = qscript.intervals
this.reference_sequence = qscript.referenceFile
}
def script = {
for (bam <- bamFiles) {
val ug = new UnifiedGenotyper with UnifiedGenotyperArguments
val vf = new VariantFiltration with UnifiedGenotyperArguments
val ve = new VariantEval with UnifiedGenotyperArguments
val pr = new PrintReads with UnifiedGenotyperArguments
pr.input_file :+= bam
pr.outputBamFile = swapExt(bam, "bam", "new.bam")
pr.scatterCount = 2
pr.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = new File("/path/to/jar") }
add(pr)
// Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh.
ug.scatterCount = 3
ug.input_file :+= bam
ug.out = swapExt(bam, "bam", "unfiltered.vcf")
vf.rodBind :+= RodBind("vcf", "VCF", ug.out)
vf.out = swapExt(bam, "bam", "filtered.vcf")
ve.rodBind :+= RodBind("vcf", "VCF", vf.out)
ve.out = swapExt(bam, "bam", "eval")
//add(ug, vf, ve)
}
}
}

View File

@ -1,8 +1,16 @@
import org.broadinstitute.sting.queue.QScript._
// Other imports can be added here
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.{GenotypeMergeType, VariantMergeType}
import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory.VE2TemplateType
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
val UNIVERSAL_GATK_ARGS = " -l INFO " // -L 1
val unusedArgs = setArgs(args)
class Onekg_table1 extends QScript {
@Argument(doc="stage")
var stage: String = _
@Argument(doc="gatkJarFile")
var gatkJarFile: File = _
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile } // -L 1
class Target(project: String, snpVCF: String, indelVCF: String, calledGenome: Double, targetGenome: Double, pop: String, pilot : String, bam: String = null) {
def reportFile: String = List(pop, pilot, "report").mkString(".")
@ -40,9 +48,9 @@ for ( (pop: String, called) <- p2Targets )
targets ::= new Target("SRP000032", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
// pilot 3
for (POP <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
val indels = if ( POP != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+POP+".exon.2010_06.genotypes.vcf.gz" else null
targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + POP + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, POP, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(POP))
for (pop <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
val indels = if ( pop != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
}
// merged files
@ -57,7 +65,7 @@ val INTERVALS = Map(
"pilot3" -> "/humgen/gsa-hpprojects/1kg/1kg_pilot3/documents/CenterSpecificTargetLists/results/p3overlap.targets.b36.interval_list"
)
def setupStage(stage: String) = stage match {
def script = stage match {
case "ALL" =>
// initial pilot1 merge -- autosomes + x
for ( (pop: String,called) <- p1Targets ) {
@ -106,36 +114,36 @@ def setupStage(stage: String) = stage match {
case _ => throw new Exception("Unknown stage" + stage)
}
setupStage(unusedArgs(0))
// Populate parameters passed in via -P
setParams
// Run the pipeline
run
// Using scala anonymous classes
class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends GatkFunction {
@Input(doc="foo") var vcfFile: File = new File(vcfIn)
@Output(doc="foo") var evalFile: File = new File(evalOut)
class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends org.broadinstitute.sting.queue.extensions.gatk.VariantEval with UNIVERSAL_GATK_ARGS {
val vcfFile = new File(vcfIn)
this.rodBind :+= RodBind("eval", vcfType, vcfFile)
this.out = new File(evalOut)
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod")
this.reportType = Some(VE2TemplateType.Grep)
this.evalModule :+= "CompOverlap"
override def dotString = "VariantEval: " + vcfFile.getName
def commandLine = gatkCommandLine("VariantEval") + UNIVERSAL_GATK_ARGS + "-D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -reportType Grep -B eval,%s,%s -o %s -E CompOverlap".format(vcfType, vcfFile, evalFile)
}
class StatPop(target: Target) extends CommandLineFunction {
@Input(doc="foo") var snpVCF = new File(target.getSNPVCF)
@Input(doc="foo") var snpEval = new File(target.getSNPEval)
@Input(doc="foo") var indelVCF = if (target.hasIndelVCF) new File(target.getIndelVCF) else {}
@Input(doc="foo", required=false) var indelVCF: File = if (target.hasIndelVCF) new File(target.getIndelVCF) else { null }
@Output(doc="foo") var reportFile: File = new File(target.reportFile)
override def dotString = "1kgStats: " + reportFile
def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/1kgStatsForCalls.py -v -a pilot_data.alignment.index -s pilot_data.sequence.index -r /broad/1KG/DCC/ftp/ -o " + target.reportFile + " " + target.extraArgs + (if (target.hasDOC) " -c " + target.getDOCSummaryFile else "") + " --snpsEval " + target.getSNPEval + (if (target.hasIndelVCF) " --indels " + target.getIndelVCF else "")
}
class Combine(vcfsInArg: List[String], vcfOutPath: String) extends GatkFunction {
@Input(doc="foo") var vcfs = vcfsInArg.map((x: String) => new File(x))
@Output(doc="foo") var vcfFile: File = new File(vcfOutPath)
class Combine(vcfsInArg: List[String], vcfOutPath: String) extends org.broadinstitute.sting.queue.extensions.gatk.CombineVariants with UNIVERSAL_GATK_ARGS {
val vcfs = vcfsInArg.map((x: String) => new File(x))
val vcfFile = new File(vcfOutPath)
this.variantmergeoption = Some(VariantMergeType.UNION)
this.genotypemergeoption = Some(GenotypeMergeType.PRIORITIZE)
this.out = vcfFile
this.rodBind ++= vcfs.map( input => RodBind(input.getName,"VCF",input) )
this.rod_priority_list = vcfs.map( _.getName ).mkString(",")
override def dotString = "CombineVariants: " + vcfs.map(_.getName).mkString(",") + " => " + vcfFile.getName
def commandLine = gatkCommandLine("CombineVariants") + UNIVERSAL_GATK_ARGS + "-variantMergeOptions UNION -genotypeMergeOptions PRIORITIZE -o %s %s -priority %s".format(vcfFile, vcfs.map( input => " -B %s,VCF,%s".format(input.getName,input)).mkString(""), vcfs.map( _.getName ).mkString(","))
}
class MaskStats(pop: String) extends CommandLineFunction {
@ -143,9 +151,19 @@ class MaskStats(pop: String) extends CommandLineFunction {
def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/maskStats.py masks/" + pop + ".mask.fa.gz -x MT -x Y -o " + outFile
}
class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends GatkFunction {
@Input(doc="foo") var bamFile: File = new File(bam)
@Output(doc="foo") var docFile: File = new File(docOutPath)
class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends org.broadinstitute.sting.queue.extensions.gatk.DepthOfCoverage with UNIVERSAL_GATK_ARGS {
val bamFile = new File(bam)
this.omitIntervalStatistics = true
this.omitDepthOutputAtEachBase = true
this.minBaseQuality = Some(0)
this.minMappingQuality = Some(0)
this.out = new File(docOutPath)
this.input_file :+= bamFile
if (interval != null) {
this.intervalsString :+= interval
this.excludeIntervalsString ++= List("MT", "Y")
}
override def dotString = "DOC: " + bamFile.getName
def commandLine = gatkCommandLine("DepthOfCoverage") + UNIVERSAL_GATK_ARGS + "-omitIntervals -omitBaseOutput -mbq 0 -mmq 0 -o %s -I %s".format(docFile, bamFile) + (if (interval != null) " -XL MT -XL Y -L " + interval else "")
}
}

View File

@ -1,240 +1,61 @@
import org.broadinstitute.sting.queue.function.scattergather.{ContigScatterFunction, FixMatesGatherFunction}
import org.broadinstitute.sting.gatk.DownsampleType
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.QScript._
// Other imports can be added here
val unparsedArgs = setArgs(args)
class fullCallingPipeline extends QScript {
qscript =>
// very slow-to-run fast-to-write parse args function. Only worth changing if using lots of flags with lots of lookups.
@Argument(doc="contigIntervals", shortName="contigIntervals")
var contigIntervals: File = _
def parseArgs(flag: String): String = {
var retNext: Boolean = false
for ( f <- unparsedArgs ) {
if ( retNext ) {
return f
} else {
if ( f.equals(flag) ) {
retNext = true
}
}
}
return "None"
}
@Argument(doc="numContigs", shortName="numContigs")
var numContigs: Int = _
/////////////////////////////////////////////////
// step one: we need to create a set of realigner targets, one for each bam file
/////////////////////////////////////////////////
// todo -- make me less of a hack that makes Khalid cry
abstract class GatkFunctionLocal extends GatkFunction {
if ( QScript.inputs("interval_list").size > 0 ) {
this.intervals = QScript.inputs("interval_list").head
} else {
this.intervals = QScript.inputs("interval.list").head
}
}
@Argument(doc="project", shortName="project")
var project: String = _
class RealignerTargetCreator extends GatkFunctionLocal {
@Gather(classOf[SimpleTextGatherFunction])
@Output(doc="Realigner targets")
var realignerIntervals: File = _
@Input(doc="trigger", shortName="trigger", required=false)
var trigger: File = _
def commandLine = gatkCommandLine("RealignerTargetCreator") + "-o %s".format(realignerIntervals)
}
@Input(doc="refseqTable", shortName="refseqTable")
var refseqTable: File = _
/////////////////////////////////////////////////
// step two: we need to clean each bam file - gather will fix mates
/////////////////////////////////////////////////
@Input(doc="dbsnpTable", shortName="dbsnpTable")
var dbsnpTable: File = _
class IndelRealigner extends GatkFunction {
@Input(doc="Intervals to clean")
var intervalsToClean: File = _
@Scatter(classOf[ContigScatterFunction])
@Input(doc="Contig intervals")
var contigIntervals: File = _
@Gather(classOf[FixMatesGatherFunction])
@Output(doc="Cleaned bam file")
var cleanedBam: File = _
@Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
var picardFixMatesJar: File = _
this.javaTmpDir = parseArgs("-tmpdir") // todo -- hack, move into script or something
@Input(doc="intervals")
var intervals: File = _
override def freeze = {
this.intervals = contigIntervals
this.jobQueue = "long"
super.freeze
}
@Input(doc="bam files", shortName="I")
var bamFiles: List[File] = Nil
def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals)
}
@Input(doc="gatk jar")
var gatkJar: File = _
/////////////////////////////////////////////////
// step three: we need to call (multisample) over all bam files
/////////////////////////////////////////////////
class UnifiedGenotyper extends GatkFunctionLocal {
@Input(doc="An optional trigger track (trigger emit will be set to 0)",required=false)
var trigger: File = _
@Input(doc="A list of comparison files for annotation",required=false)
var compTracks: List[(String,File)] = Nil
@Input(doc="Calling confidence level (may change depending on depth and number of samples)")
var callConf: Int = _
@Gather(classOf[SimpleTextGatherFunction])
@Output(doc="raw vcf")
var rawVCF: File = _
// todo -- add input for comps, triggers, etc
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-G Standard -A MyHaplotypeScore -varout %s".format(rawVCF) +
" -stand_emit_conf 10 -mmq 20 -mbq 20 -dt EXPERIMENTAL_BY_SAMPLE -dcov 200" +
" -stand_call_conf %d".format(callConf) +
( if (trigger == null ) "" else " -trig_call_conf %d -trig_emit_conf 0 -B trigger,VCF,%s".format(callConf,trigger) ) +
makeCompString
def makeCompString = {
var S: String = ""
for ( tup <- compTracks ) {
S += " -B comp%s,VCF,%s".format(tup._1,tup._2)
}
S
}
}
/////////////////////////////////////////////////
// step four: we need to call indels (multisample) over all bam files
/////////////////////////////////////////////////
class UnifiedGenotyperIndels extends GatkFunctionLocal {
@Gather(classOf[SimpleTextGatherFunction])
@Output(doc="indel vcf")
var indelVCF: File = _
// todo -- add inputs for the indel genotyper
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS".format(indelVCF)
}
/////////////////////////////////////////////////
// step five: we need to filter variants on cluster and with indel mask
/////////////////////////////////////////////////
class VariantFiltration extends GatkFunctionLocal {
@Input(doc="A VCF file to filter")
var unfilteredVCF: File = _
@Input(doc="An interval mask to use to filter indels")
var indelMask: File = _
@Input(doc="Filter names",required=false)
var filterNames: List[String] = Nil
@Input(doc="Filter expressions",required=false)
var filterExpressions: List[String] = Nil
@Output(doc="The input VCF file, but filtered")
var filteredVCF: File = _
// to do -- snp cluster args?
def commandLine = gatkCommandLine("VariantFiltration") + "-B variant,VCF,%s -B mask,VCF,%s --maskName NearIndel --clusterWindowSize 20 --clusterSize 7 -o %s".format(unfilteredVCF,indelMask,filteredVCF) +
"%s%s".format(repeat(" -filterName ",filterNames), repeat(" -filterExpression ",filterExpressions))
}
/////////////////////////////////////////////////
// step six: we need to generate gaussian clusters with the optimizer
/////////////////////////////////////////////////
class GenerateVariantClusters extends GatkFunctionLocal {
@Input(doc="A VCF that has been filtered for clusters and indels")
var initialFilteredVCF: File = _
@Output(doc="Variant cluster file generated from input VCF")
var clusterFile: File = _
// todo -- args for annotations?
// todo -- args for resources (properties file)
override def freeze = {
// todo -- hacky change in memory limit -- fix this when more official roads to do this are in place
this.memoryLimit = Some(8)
this.jobQueue = "hugemem"
super.freeze
}
def commandLine = gatkCommandLine("GenerateVariantClusters") + "-an QD -an SB -an MyHaplotypeScore -an HRun " +
"-resources /humgen/gsa-scr1/chartl/sting/R -B input,VCF,%s -clusterFile %s".format(initialFilteredVCF,clusterFile)
}
/////////////////////////////////////////////////
// step seven: we need to apply gaussian clusters to our variants
/////////////////////////////////////////////////
class ApplyGaussianClusters extends GatkFunctionLocal {
@Input(doc="A VCF file to which to apply clusters")
var inputVCF: File = _
@Input(doc="A variant cluster file")
var clusterFile: File = _
@Output(doc="A quality-score recalibrated VCF file")
var recalibratedVCF: File = _
// todo -- inputs for Ti/Tv expectation and other things
def commandLine = gatkCommandLine("VariantRecalibrator") + "--target_titv 2.1 -resources /humgen/gsa-scr1/chartl/sting/R " +
"-B input,VCF,%s -clusterFile %s -output %s".format(inputVCF,clusterFile,recalibratedVCF)
}
/////////////////////////////////////////////////
// step eight: we need to make tranches out of the recalibrated qualities
/////////////////////////////////////////////////
class ApplyVariantCuts extends GatkFunctionLocal {
@Input(doc="A VCF file that has been recalibrated")
var recalibratedVCF: File = _
@Output(doc="A VCF file that has had tranches marked")
var tranchedVCF: File = _
@Output(doc="A tranch dat file")
var tranchFile: File = _
// todo -- fdr inputs, etc
def commandLine = gatkCommandLine("ApplyVariantCuts") +
"-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0".format(recalibratedVCF,tranchedVCF,tranchFile)
}
/////////////////////////////////////////////////
// step nine: we need to annotate variants using the annotator [or maf, for now]
/////////////////////////////////////////////////
class GenomicAnnotator extends GatkFunctionLocal {
@Input(doc="A VCF file to be annotated")
var inputVCF: File = _
@Input(doc="Refseq input table to use with the annotator")
var refseqTable: File = _
@Input(doc="Dbsnp input table to use with the annotator")
var dbsnpTable: File = _
@Gather(classOf[SimpleTextGatherFunction])
@Output(doc="A genomically annotated VCF file")
var annotatedVCF: File = _
def commandLine = gatkCommandLine("GenomicAnnotator") + " -B variant,VCF,%s -B refseq,AnnotatorInputTable,%s -B dbsnp,AnnotatorInputTable,%s -vcf %s -s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet -BTI variant".format(inputVCF,refseqTable,dbsnpTable,annotatedVCF)
}
/////////////////////////////////////////////////
// step ten: we need to evaluate variants with variant eval
/////////////////////////////////////////////////
class VariantEval extends GatkFunctionLocal {
@Input(doc="An optimized vcf file to evaluate")
var optimizedVCF: File = _
@Input(doc="A hand-fitlered vcf file to evaluate")
var handFilteredVCF: File = _
@Output(doc="An evaluation file")
var evalOutput: File = _
// todo -- make comp tracks command-line arguments or properties
def commandLine = gatkCommandLine("VariantEval") + "-B evalOptimized,VCF,%s -B evalHandFiltered,VCF,%s -E CountFunctionalClasses -E CompOverlap -E CountVariants -E TiTvVariantEvaluator -o %s".format(optimizedVCF,handFilteredVCF,evalOutput)
trait CommandLineGATKArgs extends CommandLineGATK {
this.intervals = qscript.intervals
this.jarFile = qscript.gatkJar
}
// ------------ SETUP THE PIPELINE ----------- //
// todo -- the unclean and clean pipelines are the same, so the code can be condensed significantly
def script = {
val projectBase: String = qscript.project
val cleanedBase: String = projectBase + ".cleaned"
val uncleanedBase: String = projectBase + ".uncleaned"
// there are commands that use all the bam files
var cleanBamFiles = List.empty[File]
val cleanSNPCalls = new UnifiedGenotyper
val uncleanSNPCalls = new UnifiedGenotyper
val cleanIndelCalls = new UnifiedGenotyperIndels
val uncleanIndelCalls = new UnifiedGenotyperIndels
for ( bam <- inputs("bam") ) {
for ( bam <- bamFiles ) {
// put unclean bams in unclean genotypers
uncleanSNPCalls.bamFiles :+= bam
uncleanIndelCalls.bamFiles :+= bam
// in advance, create the extension files
val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list")
@ -242,86 +63,129 @@ for ( bam <- inputs("bam") ) {
// create the cleaning commands
val targetCreator = new RealignerTargetCreator
targetCreator.bamFiles :+= bam
targetCreator.realignerIntervals = indel_targets
val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs
targetCreator.input_file :+= bam
targetCreator.out = indel_targets
val realigner = new IndelRealigner
realigner.bamFiles = targetCreator.bamFiles
realigner.contigIntervals = new File(parseArgs("-contigIntervals"))
realigner.intervalsToClean = targetCreator.realignerIntervals
realigner.scatterCount = parseArgs("-numContigs").toInt
realigner.cleanedBam = cleaned_bam
val realigner = new IndelRealigner with CommandLineGATKArgs
realigner.input_file = targetCreator.input_file
realigner.intervals = qscript.contigIntervals
//realigner.targetIntervals = targetCreator.out
realigner.targetIntervals = targetCreator.out.getAbsolutePath
realigner.scatterCount = qscript.numContigs
realigner.out = cleaned_bam
realigner.scatterClass = classOf[ContigScatterFunction]
realigner.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = qscript.picardFixMatesJar }
realigner.jobQueue = "long"
// put clean bams in clean genotypers
cleanSNPCalls.bamFiles :+= realigner.cleanedBam
cleanIndelCalls.bamFiles :+= realigner.cleanedBam
cleanBamFiles :+= realigner.out
add(targetCreator,realigner)
}
endToEnd(uncleanedBase,bamFiles)
endToEnd(cleanedBase,cleanBamFiles)
}
val projectBase: String = parseArgs("-project")
val cleanedBase: String = projectBase + ".cleaned"
val uncleanedBase: String = projectBase + ".uncleaned"
def endToEnd(base: String, snps: UnifiedGenotyper, indels: UnifiedGenotyperIndels) = {
def endToEnd(base: String, bamFiles: List[File]) = {
// step through the un-indel-cleaned graph:
// 1a. call snps and indels
snps.rawVCF = new File(base+".vcf")
snps.callConf = 30
snps.trigger = new File(parseArgs("-trigger"))
val snps = new UnifiedGenotyper with CommandLineGATKArgs
snps.input_file = bamFiles
snps.group :+= "Standard"
snps.annotation :+= "MyHamplotypeScore"
snps.variants_out = new File(base+".vcf")
snps.standard_min_confidence_threshold_for_emitting = Some(10)
snps.min_mapping_quality_score = Some(20)
snps.min_base_quality_score = Some(20)
snps.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
snps.downsample_to_coverage = Some(200)
// todo -- add input for comps, triggers, etc
if (qscript.trigger != null) {
snps.trigger_min_confidence_threshold_for_calling = Some(30)
snps.rodBind :+= RodBind("trigger", "VCF", qscript.trigger)
}
// todo -- hack -- get this from the command line, or properties
snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") )
snps.compTracks :+= ( "comp1KG_ALL",new File(parseArgs("-trigger") ) )
snps.rodBind :+= RodBind( "comp1KG_CEU", "VCF", new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") )
// TODO: what is the 1KG_ALL track?
//snps.rodBind :+= RodBind( "comp1KG_ALL", "VCF", qscript.trigger )
snps.scatterCount = 100
indels.indelVCF = new File(base+".indels.vcf")
val indels = new UnifiedGenotyper with CommandLineGATKArgs
indels.input_file = bamFiles
indels.variants_out = new File(base+".indels.vcf")
indels.genotype_model = Some(Model.INDELS)
indels.scatterCount = 100
// todo -- add inputs for the indel genotyper
// 1b. genomically annotate SNPs -- slow, but scatter it
val annotated = new GenomicAnnotator
annotated.inputVCF = snps.rawVCF
annotated.refseqTable = new File(parseArgs("-refseqTable"))
annotated.dbsnpTable = new File(parseArgs("-dbsnpTable"))
annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf")
val annotated = new GenomicAnnotator with CommandLineGATKArgs
annotated.rodBind :+= RodBind("variant", "VCF", snps.variants_out)
annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
annotated.rodBind :+= RodBind("dbsnp", "AnnotatorInputTable", qscript.dbsnpTable)
annotated.vcfOutput = swapExt(snps.variants_out,".vcf",".annotated.vcf")
annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
annotated.rodToIntervalTrackName = "variant"
annotated.scatterCount = 100
// 2.a filter on cluster and near indels
val masker = new VariantFiltration
masker.unfilteredVCF = annotated.annotatedVCF
masker.indelMask = indels.indelVCF
masker.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".indel.masked.vcf")
val masker = new VariantFiltration with CommandLineGATKArgs
masker.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput)
masker.rodBind :+= RodBind("mask", "VCF", indels.variants_out)
masker.maskName = "NearIndel"
masker.clusterWindowSize = Some(20)
masker.clusterSize = Some(7)
masker.out = swapExt(annotated.vcfOutput,".vcf",".indel.masked.vcf")
// todo -- snp cluster args?
// 2.b hand filter with standard filter
val handFilter = new VariantFiltration
handFilter.unfilteredVCF = annotated.annotatedVCF
handFilter.indelMask = indels.indelVCF
handFilter.filterNames = List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun")
handFilter.filterExpressions = List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"")
handFilter.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".handfiltered.vcf")
val handFilter = new VariantFiltration with CommandLineGATKArgs
handFilter.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput)
handFilter.rodBind :+= RodBind("mask", "VCF", indels.variants_out)
handFilter.filterName ++= List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun")
handFilter.filterExpression ++= List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"")
handFilter.out = swapExt(annotated.vcfOutput,".vcf",".handfiltered.vcf")
// 3.i generate gaussian clusters on the masked vcf
val clusters = new GenerateVariantClusters
clusters.initialFilteredVCF = masker.filteredVCF
clusters.clusterFile = swapExt(snps.rawVCF,".vcf",".cluster")
val clusters = new GenerateVariantClusters with CommandLineGATKArgs
clusters.rodBind :+= RodBind("input", "VCF", masker.out)
//clusters.clusterFile = swapExt(snps.variants_out,".vcf",".cluster")
val clusters_clusterFile = swapExt(snps.variants_out,".vcf",".cluster")
clusters.clusterFile = clusters_clusterFile.getAbsolutePath
clusters.memoryLimit = Some(8)
clusters.jobQueue = "hugemem"
// todo -- args for annotations?
// todo -- args for resources (properties file)
clusters.use_annotation ++= List("QD", "SB", "MyHaplotypeScore", "HRun")
clusters.path_to_resources = "/humgen/gsa-scr1/chartl/sting/R"
// 3.ii apply gaussian clusters to the masked vcf
val recalibrate = new ApplyGaussianClusters
val recalibrate = new VariantRecalibrator with CommandLineGATKArgs
recalibrate.clusterFile = clusters.clusterFile
recalibrate.inputVCF = masker.filteredVCF
recalibrate.recalibratedVCF = swapExt(masker.filteredVCF,".vcf",".optimized.vcf")
recalibrate.rodBind :+= RodBind("input", "VCF", masker.out)
recalibrate.out = swapExt(masker.out,".vcf",".optimized.vcf")
// todo -- inputs for Ti/Tv expectation and other things
recalibrate.target_titv = Some(2.1)
// 3.iii apply variant cuts to the clusters
val cut = new ApplyVariantCuts
cut.recalibratedVCF = recalibrate.recalibratedVCF
cut.tranchedVCF = swapExt(recalibrate.recalibratedVCF,".vcf",".tranched.vcf")
cut.tranchFile = swapExt(recalibrate.recalibratedVCF,".vcf",".tranch")
val cut = new ApplyVariantCuts with CommandLineGATKArgs
cut.rodBind :+= RodBind("input", "VCF", recalibrate.out)
//cut.outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf")
//cut.tranchesFile = swapExt(recalibrate.out,".vcf",".tranch")
val cut_outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf")
val cut_tranchesFile = swapExt(recalibrate.out,".vcf",".tranch")
cut.outputVCFFile = cut_outputVCFFile.getAbsolutePath
cut.tranchesFile = cut_tranchesFile.getAbsolutePath
// todo -- fdr inputs, etc
cut.fdr_filter_level = Some(10)
// 4. Variant eval the cut and the hand-filtered vcf files
val eval = new VariantEval
eval.optimizedVCF = cut.tranchedVCF
eval.handFilteredVCF = handFilter.filteredVCF
eval.evalOutput = new File(base+".eval")
val eval = new VariantEval with CommandLineGATKArgs
eval.rodBind :+= RodBind("evalOptimized", "VCF", cut_outputVCFFile)
eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out)
// todo -- make comp tracks command-line arguments or properties
eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator")
eval.out = new File(base+".eval")
add(snps,indels,annotated,masker,handFilter,clusters,recalibrate,cut,eval)
}
endToEnd(uncleanedBase,uncleanSNPCalls,uncleanIndelCalls)
endToEnd(cleanedBase,cleanSNPCalls,cleanIndelCalls)
setParams
run
}

View File

@ -1,73 +1,77 @@
import java.io.File
import org.broadinstitute.sting.queue.QScript._
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
import org.apache.commons.io.FilenameUtils;
// Other imports can be added here
val unusedArgs = setArgs(args)
class recalibrate extends QScript {
@Input(doc="bamIn", shortName="I")
var bamIns: List[File] = Nil
@Argument(doc="scatter")
var scatter = false
def runPipeline(arg: String) = {
val scatter = arg == "scatter"
@Argument(doc="gatk jar file")
var gatkJarFile: File = _
for (bamIn <- inputs(".bam")) {
def script = {
for (bamIn <- bamIns) {
val root = bamIn.getPath()
val bamRoot = FilenameUtils.removeExtension(root);
val recalData = new File(bamRoot + ".recal_data.csv")
val recalBam = new File(bamRoot + ".recal.bam")
val recalRecalData = new File(bamRoot + ".recal.recal_data.csv")
//add(new CountCovariates(root, recalData, "-OQ"))
val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam, "-OQ")
val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true }
if ( scatter ) {
tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list")
tableRecal.scatterCount = 25
}
add(tableRecal)
add(new Index(recalBam))
add(new CountCovariates(recalBam, recalRecalData, "-nt 4"))
add(new CountCovariates(recalBam, recalRecalData) { num_threads = Some(4) })
add(new AnalyzeCovariates(recalData, new File(recalData.getPath() + ".analyzeCovariates")))
add(new AnalyzeCovariates(recalRecalData, new File(recalRecalData.getPath() + ".analyzeCovariates")))
}
}
runPipeline(unusedArgs(0))
// Populate parameters passed in via -P
setParams
// Run the pipeline
run
def bai(bam: File) = new File(bam + ".bai")
class Index(bamIn: File) extends GatkFunction {
@Input(doc="foo") var bam = bamIn
@Output(doc="foo") var bamIndex = bai(bamIn)
memoryLimit = Some(1)
override def dotString = "Index: %s".format(bamIn.getName)
def commandLine = "samtools index %s".format(bam)
class Index(bamIn: File) extends BamIndexFunction {
bamFile = bamIn
}
class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction {
@Input(doc="foo") var bam = bamIn
@Input(doc="foo") var bamIndex = bai(bamIn)
@Output(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4)
override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, args)
def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData)
class CountCovariates(bamIn: File, recalDataIn: File) extends org.broadinstitute.sting.queue.extensions.gatk.CountCovariates {
this.jarFile = gatkJarFile
this.input_file :+= bamIn
this.recal_file = recalDataIn
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod")
this.logging_level = "INFO"
this.max_reads_at_locus = Some(20000)
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.memoryLimit = Some(4)
override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, if (this.num_threads.isDefined) "-nt " + this.num_threads else "")
}
class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args: String = "") extends GatkFunction {
@Input(doc="foo") var bamIn = bamInArg
@Input(doc="foo") var recalData = recalDataIn
@Gather(classOf[BamGatherFunction])
@Output(doc="foo") var bamOut = bamOutArg
override def dotString = "TableRecalibrate: %s => %s [args %s]".format(bamInArg.getName, bamOutArg.getName, args)
memoryLimit = Some(2)
def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath())
class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) extends org.broadinstitute.sting.queue.extensions.gatk.TableRecalibration {
this.jarFile = gatkJarFile
this.input_file :+= bamInArg
this.recal_file = recalDataIn
this.output_bam = bamOutArg
this.logging_level = "INFO"
this.memoryLimit = Some(2)
override def dotString = "TableRecalibrate: %s => %s".format(bamInArg.getName, bamOutArg.getName, if (this.useOriginalQualities) " -OQ" else "")
}
class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction {
@Input(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4)
class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends org.broadinstitute.sting.queue.extensions.gatk.AnalyzeCovariates {
this.jarFile = new File("/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar")
this.recal_file = recalDataIn
this.output_dir = outputDir.toString
this.path_to_resources = "/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/"
this.ignoreQ = Some(5)
this.path_to_Rscript = "/broad/tools/apps/R-2.6.0/bin/Rscript"
this.memoryLimit = Some(4)
override def dotString = "AnalyzeCovariates: %s".format(recalDataIn.getName)
def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir)
}
}

View File

@ -1,7 +1,11 @@
import org.broadinstitute.sting.queue.QScript._
// Other imports can be added here
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
setArgs(args)
class variantRecalibrator extends QScript {
@Argument(doc="gatkJarFile")
var gatkJarFile: File = _
def script = {
val gList = List(30)
val sList = List(0.0001, 0.01)
@ -13,66 +17,40 @@ for (g: Int <- gList) {
for (d: Double <- dList) {
for(b: Double <- bList) {
// Using classes defined below
// Using classes defined by QueueGATKExtensions.jar
val gvc = new GenerateVariantClusters
val vr = new VariantRecalibrator
gvc.maxGaussians = g
gvc.shrinkage = s
gvc.dirichlet = d
gvc.clusterFile = new File("g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b))
gvc.jobOutputFile = swapExt(gvc.clusterFile, ".cluster", ".gvc.out")
gvc.jarFile = gatkJarFile
gvc.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf"))
gvc.logging_level = "INFO"
gvc.intervalsString :+= "20"
gvc.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
gvc.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
gvc.maxGaussians = Some(g)
gvc.shrinkage = Some(s)
gvc.shrinkageFormat = "%.6f"
gvc.dirichlet = Some(d)
gvc.dirichletFormat = "%.6f"
gvc.clusterFile = "g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b)
gvc.jobOutputFile = new File(gvc.clusterFile.stripSuffix(".cluster") + ".gvc.out")
vr.jarFile = gatkJarFile
vr.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf"))
vr.logging_level = "INFO"
vr.intervalsString :+= "20"
vr.target_titv = Some(2.1)
vr.ignore_filter :+= "HARD_TO_VALIDATE"
vr.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
vr.clusterFile = gvc.clusterFile
vr.jobOutputFile = swapExt(vr.clusterFile, ".cluster", ".vr.out")
vr.backOff = b
vr.jobOutputFile = new File(vr.clusterFile.stripSuffix(".cluster") + ".vr.out")
vr.backOff = Some(b)
vr.backOffFormat = "%.2f"
add(gvc, vr)
}
}
}
}
// Populate parameters passed in via -P
setParams
// Run the pipeline
run
// A very basic GATK UnifiedGenotyper
class GenerateVariantClusters extends GatkFunction {
var maxGaussians: Int = _
var shrinkage: Double = _
var dirichlet: Double = _
@Output
var clusterFile: File = _
def commandLine = gatkCommandLine("GenerateVariantClusters") +
"-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " +
"-l INFO -L 20 -an QD -an SB -an HaplotypeScore -an HRun " +
"-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " +
"-mG %d ".format(maxGaussians) +
"-shrinkage %.6f ".format(shrinkage) +
"-dirichlet %.6f ".format(dirichlet) +
"-clusterFile %s".format(clusterFile)
}
// A basic GATK VariantFiltration
class VariantRecalibrator extends GatkFunction {
var backOff: Double = _
@Input
var clusterFile: File = _
def commandLine = gatkCommandLine("VariantRecalibrator") +
"-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " +
"-l INFO -L 20 -titv 2.1 " +
"--ignore_filter HARD_TO_VALIDATE " +
"-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " +
"-backOff %.2f ".format(backOff) +
"-clusterFile %s ".format(clusterFile) +
"-output %s".format(clusterFile)
}
}

View File

@ -1,7 +0,0 @@
gatkJar = /humgen/gsa-hpprojects/GATK/bin/current/GenomeAnalysisTK.jar
referenceFile = /path/to/reference.fasta
dbsnp = /path/to/dbsnp
intervals = /path/to/my.interval_list
jobNamePrefix = Q
memoryLimit = 2
gatkLoggingLevel = INFO

View File

@ -1,54 +0,0 @@
import org.broadinstitute.sting.queue.QScript._
setArgs(args)
for (bam <- inputs("bam")) {
val ug = new UnifiedGenotyper
val vf = new VariantFiltration
val ve = new GatkFunction {
@Input(doc="vcf") var vcfFile: File = _
@Output(doc="eval") var evalFile: File = _
def commandLine = gatkCommandLine("VariantEval") + "-B eval,VCF,%s -o %s".format(vcfFile, evalFile)
}
// Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh.
ug.scatterCount = 3
ug.bamFiles :+= bam
ug.vcfFile = swapExt(bam, "bam", "unfiltered.vcf")
vf.vcfInput = ug.vcfFile
vf.vcfOutput = swapExt(bam, "bam", "filtered.vcf")
ve.vcfFile = vf.vcfOutput
ve.evalFile = swapExt(bam, "bam", "eval")
add(ug, vf, ve)
}
setParams
run
class UnifiedGenotyper extends GatkFunction {
@Output(doc="vcf")
@Gather(classOf[SimpleTextGatherFunction])
var vcfFile: File = _
def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s".format(vcfFile)
}
class VariantFiltration extends GatkFunction {
@Input(doc="input vcf")
var vcfInput: File = _
@Input(doc="filter names")
var filterNames: List[String] = Nil
@Input(doc="filter expressions")
var filterExpressions: List[String] = Nil
@Output(doc="output vcf")
var vcfOutput: File = _
def commandLine = gatkCommandLine("VariantFiltration") + "%s%s -B variant,VCF,%s -o %s"
.format(repeat(" -filterName ", filterNames), repeat(" -filterExpression ", filterExpressions), vcfInput, vcfOutput)
}

View File

@ -1,105 +0,0 @@
package org.broadinstitute.sting.queue
import collection.mutable.ListBuffer
import collection.JavaConversions._
import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.utils.text.XReadLines
import java.io.{FileInputStream, File}
import java.util.Properties
class QArguments(args: Array[String]) {
var bsubAllJobs = false
var bsubWaitJobs = false
var dryRun = false
val scripts = new ListBuffer[String]
var inputPaths = List.empty[File]
var properties = Map.empty[String, String]
val userArgs = parseArgs(args)
private def parseArgs(args: Array[String]) = {
var filtered = new ListBuffer[String]
filtered.appendAll(args)
if (isFlagged(filtered, "-debug"))
Logging.setDebug
if (isFlagged(filtered, "-trace"))
Logging.setTrace
if (isFlagged(filtered, "-dry"))
dryRun = true
if (isFlagged(filtered, "-bsub"))
bsubAllJobs = true
if (isFlagged(filtered, "-bsubWait"))
bsubWaitJobs = true
for (arg <- getArgs(filtered, "-P"))
addProperties(arg)
for (arg <- getArgs(filtered, "-I"))
addFile(arg)
for (arg <- getArgs(filtered, "-S"))
scripts.append(arg)
List(filtered:_*)
}
private def isFlagged(filtered: ListBuffer[String], search: String) = {
var found = false
var index = 0
while (0 <= index && index < filtered.size) {
index = filtered.indexOf(search)
if (index >= 0) {
found = true
filtered.remove(index)
}
}
found
}
private def getArgs(filtered: ListBuffer[String], search: String) = {
var found = new ListBuffer[String]
var index = 0
while (0 <= index && index < filtered.size) {
index = filtered.indexOf(search)
if (index >= 0) {
found.append(filtered(index+1))
filtered.remove(index, 2)
}
}
found
}
def addProperties(arg: String) = {
var file = new File(arg)
if (arg.contains("=") && !file.exists) {
val tokens = arg.split("=", 2)
properties += tokens(0) -> tokens(1)
} else if (arg.endsWith(".properties")) {
if (!file.exists)
throw new QException("File not found: " + file.getAbsolutePath)
var props = new Properties
props.load(new FileInputStream(file))
for ((name, value) <- props)
properties += name -> value
} else {
throw new QException("Invalid property: " + arg)
}
}
def addFile(arg: String): Unit = {
var file = new File(arg)
inputPaths :+= file
if (arg.endsWith(".list"))
new XReadLines(file).iterator.foreach(addFile(_))
}
}
object QArguments {
def strip(filtered: ListBuffer[String], search: String) = {
var index = 0
while (0 <= index && index < filtered.size) {
index = filtered.indexOf(search)
if (index >= 0) {
filtered.remove(index, 2)
}
}
}
}

View File

@ -1,47 +1,115 @@
package org.broadinstitute.sting.queue
import tools.nsc.MainGenericRunner
import org.broadinstitute.sting.queue.util.ClasspathUtils
import collection.mutable.ListBuffer
import org.broadinstitute.sting.queue.util.Logging
import java.io.File
import java.util.Arrays
import org.broadinstitute.sting.queue.engine.QGraph
import org.broadinstitute.sting.commandline.{ClassType, Input, Argument, CommandLineProgram}
import org.broadinstitute.sting.queue.util.{Logging, ScalaCompoundArgumentTypeDescriptor}
object QCommandLine extends Application with Logging {
var usage = """usage: java -jar Queue.jar [-P name=value] [-P file.properties] [-I input.file] [-I input_files.list] [-bsub] [-bsubWait] [-dry] [-debug] -S pipeline.scala"""
/**
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
*/
class QCommandLine extends CommandLineProgram with Logging {
@Input(fullName="script", shortName="S", doc="QScript scala file", required=true)
@ClassType(classOf[File])
private var scripts = List.empty[File]
override def main(args: Array[String]) = {
val qArgs: QArguments = try {
new QArguments(args)
} catch {
case exception => {
println(exception)
println(usage)
System.exit(-1)
}
null
@Argument(fullName="bsub_all_jobs", shortName="bsub", doc="Use bsub to submit jobs", required=false)
private var bsubAllJobs = false
@Argument(fullName="bsub_wait_jobs", shortName="bsubWait", doc="Wait for bsub submitted jobs before exiting", required=false)
private var bsubWaitJobs = false
@Argument(fullName="run_scripts", shortName="run", doc="Run QScripts", required=false)
private var run = false
@Argument(fullName="dot_graph", shortName="dot", doc="Outputs the queue graph to a .dot file. See: http://en.wikipedia.org/wiki/DOT_language", required=false)
private var queueDot: File = _
/**
* Takes the QScripts passed in, runs their script() methods, retrieves their generated
* functions, and then builds and runs a QGraph based on the dependencies.
*/
def execute = {
val qGraph = new QGraph
qGraph.dryRun = !run
qGraph.bsubAllJobs = bsubAllJobs
qGraph.bsubWaitJobs = bsubWaitJobs
val scripts = qScriptManager.createScripts()
for (script <- scripts) {
logger.info("Scripting " + qScriptManager.getName(script.getClass.asSubclass(classOf[QScript])))
loadArgumentsIntoObject(script)
script.script
script.functions.foreach(qGraph.add(_))
logger.info("Added " + script.functions.size + " functions")
}
logger.debug("starting")
if (qArgs.scripts.size == 0) {
println("Error: Missing script")
println(usage)
System.exit(-1)
logger.info("Binding functions")
qGraph.fillIn
if (queueDot != null) {
logger.info("Generating " + queueDot)
qGraph.renderToDot(queueDot)
}
// NOTE: Something in MainGenericRunner is exiting the VM.
if (qArgs.scripts.size != 1) {
println("Error: Only one script can be run at a time")
println(usage)
System.exit(-1)
}
logger.info("Running generated graph")
qGraph.run
logger.info("Done")
0
}
val newArgs = new ListBuffer[String]
newArgs.appendAll(args)
QArguments.strip(newArgs, "-S")
newArgs.prepend("-nocompdaemon", "-classpath", ClasspathUtils.manifestAwareClassPath, qArgs.scripts.head)
MainGenericRunner.main(newArgs.toArray)
/**
* Returns true as QScripts are located and compiled.
* @return true
*/
override def canAddArgumentsDynamically = true
// NOTE: This line is not reached because the MainGenericRunner exits the VM.
logger.debug("exiting")
/**
* Returns the list of QScripts passed in via -S so that their
* arguments can be inspected before QScript.script is called.
* @return Array of QScripts passed in.
*/
override def getArgumentSources =
qScriptManager.getValues.asInstanceOf[Array[Class[_]]]
/**
* Returns the name of a QScript
* @return The name of a QScript
*/
override def getArgumentSourceName(source: Class[_]) =
qScriptManager.getName(source.asSubclass(classOf[QScript]))
/**
* Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections.
* @return a ScalaCompoundArgumentTypeDescriptor
*/
override def getArgumentTypeDescriptors =
Arrays.asList(new ScalaCompoundArgumentTypeDescriptor)
/**
* Loads the QScripts passed in and returns a new QScriptManager than can be used to create them.
*/
private lazy val qScriptManager = {
QScriptManager.loadScripts(scripts)
new QScriptManager
}
}
/**
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
*/
object QCommandLine {
/**
* Main.
* @param argv Arguments.
*/
def main(argv: Array[String]) {
try {
CommandLineProgram.start(new QCommandLine, argv);
if (CommandLineProgram.result != 0)
System.exit(CommandLineProgram.result);
} catch {
case e: Exception => CommandLineProgram.exitSystemWithError(e)
}
}
}

View File

@ -1,109 +1,41 @@
package org.broadinstitute.sting.queue
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.queue.engine.QGraph
import org.broadinstitute.sting.queue.util.Logging
/**
* Syntactic sugar for filling in a pipeline using a Scala script.
* Defines a Queue pipeline as a collection of CommandLineFunctions.
*/
object QScript {
trait QScript extends Logging {
// Type aliases so users don't have to import
type File = java.io.File
type Input = org.broadinstitute.sting.commandline.Input
type Output = org.broadinstitute.sting.commandline.Output
type Argument = org.broadinstitute.sting.commandline.Argument
type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection
type CommandLineFunction = org.broadinstitute.sting.queue.function.CommandLineFunction
type GatkFunction = org.broadinstitute.sting.queue.function.gatk.GatkFunction
type ScatterGatherableFunction = org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
type Scatter = org.broadinstitute.sting.queue.function.scattergather.Scatter
type Gather = org.broadinstitute.sting.queue.function.scattergather.Gather
type BamGatherFunction = org.broadinstitute.sting.queue.function.scattergather.BamGatherFunction
type SimpleTextGatherFunction = org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction
// The arguments for executing pipelines
private var qArgs: QArguments = _
// A default pipeline. Can also use multiple 'new Pipeline()'
private val pipeline = new Pipeline
/**
* Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method.
*/
def script: Unit
/**
* Initializes the QArguments and returns a list of the rest of the user args.
* The command line functions that will be executed for this QScript.
*/
def setArgs(params: Array[String]) = {
qArgs = new QArguments(params)
qArgs.userArgs
}
/**
* Returns a list of files that were specified with "-I <file>" on the command line
* or inside a .list file.
*/
def inputs(extension: String) = qArgs.inputPaths.filter(_.getName.endsWith(extension))
var functions = List.empty[CommandLineFunction]
/**
* Exchanges the extension on a file.
*/
def swapExt(file: File, oldExtension: String, newExtension: String) =
protected def swapExt(file: File, oldExtension: String, newExtension: String) =
new File(file.getName.stripSuffix(oldExtension) + newExtension)
/**
* Adds one or more command line functions for dispatch later during run()
* Adds one or more command line functions to be run.
*/
def add(functions: CommandLineFunction*) = pipeline.add(functions:_*)
/**
* Sets the @Input and @Output values for all the functions
*/
def setParams(): Unit = pipeline.setParams()
/**
* Sets the @Input and @Output values for a single function
*/
def setParams(function: CommandLineFunction): Unit = pipeline.setParams(function)
/**
* Executes functions that have been added to the pipeline.
*/
def run() = pipeline.run()
/**
* Encapsulates a set of functions to run together.
*/
protected class Pipeline {
private var functions = List.empty[CommandLineFunction]
/**
* Adds one or more command line functions for dispatch later during run()
*/
def add(functions: CommandLineFunction*) =
this.functions :::= List(functions:_*)
/**
* Sets the @Input and @Output values for all the functions
*/
def setParams(): Unit =
for (function <- functions) setParams(function)
/**
* Sets the @Input and @Output values for a single function
*/
def setParams(function: CommandLineFunction): Unit =
function.properties = qArgs.properties
/**
* Executes functions that have been added to the pipeline.
*/
def run() = {
val qGraph = new QGraph
qGraph.dryRun = qArgs.dryRun
qGraph.bsubAllJobs = qArgs.bsubAllJobs
qGraph.bsubWaitJobs = qArgs.bsubWaitJobs
qGraph.properties = qArgs.properties
for (function <- functions)
qGraph.add(function)
qGraph.fillIn
qGraph.run
qGraph.renderToDot(new File("queue.dot"))
}
}
def add(functions: CommandLineFunction*) = this.functions ++= List(functions:_*)
}

View File

@ -0,0 +1,163 @@
package org.broadinstitute.sting.queue
import org.broadinstitute.sting.utils.classloader.PluginManager
import scala.tools.nsc.{Global, Settings}
import scala.tools.nsc.io.PlainFile
import org.broadinstitute.sting.queue.util.{Logging, ClasspathUtils, IOUtils}
import collection.JavaConversions
import java.io.File
import scala.tools.nsc.reporters.AbstractReporter
import java.lang.String
import org.apache.log4j.Level
import scala.tools.nsc.util.{FakePos, NoPosition, Position}
/**
* Plugin manager for QScripts which loads QScripts into the current class loader.
*/
class QScriptManager extends PluginManager[QScript](classOf[QScript], "QScript", "Script") with Logging {
/**
* Returns the list of QScripts classes found in the classpath.
* @return QScripts classes found in the classpath.
*/
def getValues = {
if (logger.isTraceEnabled) {
logger.trace(JavaConversions.asMap(this.pluginsByName)
.foreach{case (name, clazz) => "Found QScript %s: %s".format(name, clazz)})
}
JavaConversions.asIterable(this.pluginsByName.values).toArray
}
/**
* Creates the QScripts for all values found in the classpath.
* @return QScripts found in the classpath.
*/
def createScripts() = getValues.map(_.newInstance.asInstanceOf[QScript])
}
/**
* Plugin manager for QScripts which loads QScripts into the current classloader.
*/
object QScriptManager extends Logging {
/**
* Compiles and loads the scripts in the files into the current classloader.
* Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
* @param scripts Scala classes to compile.
*/
def loadScripts(scripts: List[File]) {
if (scripts.size > 0) {
val settings = new Settings((error: String) => logger.error(error))
val outdir = IOUtils.tempDir("Q-classes").getAbsoluteFile
settings.outdir.value = outdir.getPath
// Set the classpath to the current class path.
ClasspathUtils.manifestAwareClassPath.foreach(path => settings.classpath.append(path.getPath))
val reporter = new Log4JReporter(settings)
val compiler = new Global(settings, reporter)
val run = new compiler.Run
logger.debug("Compiling %s QScript%s".format(scripts.size, plural(scripts.size)))
logger.trace("Compilation directory: " + settings.outdir.value)
run.compileFiles(scripts.map(new PlainFile(_)))
reporter.printSummary()
if (reporter.hasErrors) {
val msg = "Compile failed with %d error%s".format(
reporter.ERROR.count, plural(reporter.ERROR.count))
throw new QException(msg)
}
else if (reporter.WARNING.count > 0)
logger.warn("Compile succeeded with %d warning%s".format(
reporter.WARNING.count, plural(reporter.WARNING.count)))
else
logger.debug("Compilation complete")
// Add the new compilation output directory to the classpath.
ClasspathUtils.addClasspath(outdir)
}
}
/**
* Returns the string "s" if x is greater than 1.
* @param x Value to test.
* @return "s" if x is greater than one else "".
*/
private def plural(x: Int) = if (x > 1) "s" else ""
/**
* NSC (New Scala Compiler) reporter which logs to Log4J.
* Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala
*/
private class Log4JReporter(val settings: Settings) extends AbstractReporter {
def displayPrompt = throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.")
/**
* Displays the message at position with severity.
* @param posIn Position of the event in the file that generated the message.
* @param msg Message to display.
* @param severity Severity of the event.
*/
def display(posIn: Position, msg: String, severity: Severity) = {
severity.count += 1
val level = severity match {
case INFO => Level.INFO
case WARNING => Level.WARN
case ERROR => Level.ERROR
}
val pos = if (posIn eq null) NoPosition
else if (posIn.isDefined) posIn.inUltimateSource(posIn.source)
else posIn
pos match {
case FakePos(fmsg) =>
printMessage(level, fmsg+" "+msg)
case NoPosition =>
printMessage(level, msg)
case _ =>
val buf = new StringBuilder(msg)
val file = pos.source.file
printMessage(level, file.name+":"+pos.line+": "+msg)
printSourceLine(level, pos)
}
}
/**
* Prints a summary count of warnings and errors.
*/
def printSummary() = {
if (WARNING.count > 0)
printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found")
if (ERROR.count > 0)
printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found")
}
/**
* Prints the source code line of an event followed by a pointer within the line to the error.
* @param level Severity level.
* @param pos Position in the file of the event.
*/
private def printSourceLine(level: Level, pos: Position) {
printMessage(level, pos.lineContent.stripLineEnd)
printColumnMarker(level, pos)
}
/**
* Prints the column marker of the given position.
* @param level Severity level.
* @param pos Position in the file of the event.
*/
private def printColumnMarker(level: Level, pos: Position) =
if (pos.isDefined) { printMessage(level, " " * (pos.column - 1) + "^") }
/**
* Prints the message at the severity level.
* @param level Severity level.
* @param message Message content.
*/
private def printMessage(level: Level, message: String) = {
logger.log(level, message)
}
}
}

View File

@ -1,20 +0,0 @@
package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.util.{Logging, ProcessUtils}
import org.broadinstitute.sting.queue.function.CommandLineFunction
/**
* Runs jobs one at a time locally
*/
trait CommandLineRunner extends Logging {
def run(function: CommandLineFunction, qGraph: QGraph) = {
if (logger.isDebugEnabled) {
logger.debug(function.commandDirectory + " > " + function.commandLine)
} else {
logger.info(function.commandLine)
}
if (!qGraph.dryRun)
ProcessUtils.runCommandAndWait(function.commandLine, function.commandDirectory)
}
}

View File

@ -1,22 +1,38 @@
package org.broadinstitute.sting.queue.engine
import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.{DispatchFunction, QFunction}
import org.broadinstitute.sting.queue.function.{CommandLineFunction, QFunction}
import scala.collection.immutable.ListSet
/**
* Dispatches jobs to a compute cluster.
*/
trait DispatchJobRunner {
/** Type of the job. */
type DispatchJobType
private var dispatchJobs = Map.empty[DispatchFunction, DispatchJobType]
/** An internal cache of all the jobs that have run by command line function. */
private var dispatchJobs = Map.empty[CommandLineFunction, DispatchJobType]
/** An internal list of functions that have no other dependencies. */
private var waitJobsByGraph = Map.empty[QGraph, ListSet[DispatchJobType]]
/**
* Dispatches a function to the queue and returns immediately, unless the function is a DispatchWaitFunction
* in which case it waits for all other terminal functions to complete.
* @param function Command to run.
* @param qGraph graph that holds the job, and if this is a dry run.
*/
def dispatch(function: DispatchFunction, qGraph: QGraph)
def dispatch(function: CommandLineFunction, qGraph: QGraph)
protected def addJob(function: DispatchFunction, qGraph: QGraph,
dispatchJob: DispatchJobType, previousJobs: List[DispatchJobType]) = {
/**
* Adds the job to the internal cache of previous jobs and removes the previous jobs that
* the job was dependent on from the list of function that have no dependencies.
* @param function CommandLineFunction to add to the list.
* @param qGraph Current qGraph being iterated over.
* @param dispatchJob The job that is being added to the cache.
* @param previousJobs The previous jobs that the job was dependent one.
*/
protected def addJob(function: CommandLineFunction, qGraph: QGraph,
dispatchJob: DispatchJobType, previousJobs: Iterable[DispatchJobType]) = {
dispatchJobs += function -> dispatchJob
var waitJobs = getWaitJobs(qGraph)
for (previousJob <- previousJobs)
@ -26,7 +42,10 @@ trait DispatchJobRunner {
}
/**
* Walks up the graph looking for the previous LsfJobs
* Walks up the graph looking for the previous LsfJobs.
* @param function Function to examine for a previous command line job.
* @param qGraph The graph that contains the jobs.
* @return A list of prior jobs.
*/
protected def previousJobs(function: QFunction, qGraph: QGraph) : List[DispatchJobType] = {
var previous = List.empty[DispatchJobType]
@ -36,10 +55,10 @@ trait DispatchJobRunner {
incomingEdge match {
// Stop recursing when we find a job along the edge and return its job id
case dispatchFunction: DispatchFunction => previous :+= dispatchJobs(dispatchFunction)
case dispatchFunction: CommandLineFunction => previous :+= dispatchJobs(dispatchFunction)
// For any other type of edge find the LSF jobs preceding the edge
case qFunction: QFunction => previous = previousJobs(qFunction, qGraph) ::: previous
case qFunction: QFunction => previous ++= previousJobs(qFunction, qGraph)
}
}
previous
@ -47,10 +66,25 @@ trait DispatchJobRunner {
/**
* Returns a set of jobs that have no following jobs in the graph.
* @param qGraph The graph that contains the jobs.
* @return ListSet[DispatchJobType] of previous jobs that have no dependent jobs.
*/
protected def getWaitJobs(qGraph: QGraph) = {
if (!waitJobsByGraph.contains(qGraph))
waitJobsByGraph += qGraph -> ListSet.empty[DispatchJobType]
waitJobsByGraph(qGraph)
}
/**
* Builds a command line that can be run to force an automount of the directories.
* @param function Function to look jobDirectories.
* @return A "cd <dir_1> [&& cd <dir_n>]" command.
*/
protected def mountCommand(function: CommandLineFunction) = {
val dirs = function.jobDirectories
if (dirs.size > 0)
Some("\'" + dirs.mkString("cd ", " && cd ", "") + "\'")
else
None
}
}

View File

@ -1,55 +1,76 @@
package org.broadinstitute.sting.queue.engine
import collection.JavaConversions._
import edu.mit.broad.core.lsf.LocalLsfJob
import java.util.ArrayList
import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.function.{DispatchWaitFunction, DispatchFunction}
import org.broadinstitute.sting.queue.function.{CommandLineFunction, DispatchWaitFunction}
import org.broadinstitute.sting.queue.util.{IOUtils, LsfJob, Logging}
/**
* Runs jobs on an LSF compute cluster.
*/
trait LsfJobRunner extends DispatchJobRunner with Logging {
type DispatchJobType = LocalLsfJob
type DispatchJobType = LsfJob
def dispatch(function: DispatchFunction, qGraph: QGraph) = {
val job = new LocalLsfJob
job.setName(function.jobName)
job.setOutputFile(function.jobOutputFile)
job.setErrFile(function.jobErrorFile)
job.setWorkingDir(function.commandDirectory)
job.setProject(function.jobProject)
job.setQueue(function.jobQueue)
job.setCommand(function.commandLine)
/**
* Dispatches the function on the LSF cluster.
* @param function Command to run.
* @param qGraph graph that holds the job, and if this is a dry run.
*/
def dispatch(function: CommandLineFunction, qGraph: QGraph) = {
val job = new LsfJob
job.name = function.jobName
job.outputFile = function.jobOutputFile
job.errorFile = function.jobErrorFile
job.project = function.jobProject
job.queue = function.jobQueue
job.command = function.commandLine
var extraArgs = List("-r")
if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory))
job.workingDir = function.commandDirectory
if (function.jobRestartable)
job.extraBsubArgs :+= "-r"
if (function.memoryLimit.isDefined)
extraArgs :::= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
val previous =
val previous: Iterable[LsfJob] =
if (function.isInstanceOf[DispatchWaitFunction]) {
extraArgs :+= "-K"
getWaitJobs(qGraph).toList
job.waitForCompletion = true
getWaitJobs(qGraph)
} else {
previousJobs(function, qGraph)
}
if (previous.size > 0)
extraArgs :::= List("-w", dependencyExpression(previous))
mountCommand(function) match {
case Some(command) => job.preExecCommand = command
case None => /* ignore */
}
job.setExtraBsubArgs(new ArrayList(extraArgs))
if (previous.size > 0)
job.extraBsubArgs ++= List("-w", dependencyExpression(previous, function.jobRunOnlyIfPreviousSucceed))
addJob(function, qGraph, job, previous)
if (logger.isDebugEnabled) {
logger.debug(function.commandDirectory + " > " + job.getBsubCommand.mkString(" "))
logger.debug(function.commandDirectory + " > " + job.bsubCommand.mkString(" "))
} else {
logger.info(job.getBsubCommand.mkString(" "))
logger.info(job.bsubCommand.mkString(" "))
}
if (!qGraph.dryRun)
job.start
job.run
}
private def dependencyExpression(jobs: List[LocalLsfJob]) = {
jobs.toSet[LocalLsfJob].map(_.getName).mkString("ended(\"", "\") && ended(\"", "\")")
/**
* Returns the dependency expression for the prior jobs.
* @param jobs Previous jobs this job is dependent on.
* @param runOnSuccess Run the job only if the previous jobs succeed.
* @return The dependency expression for the prior jobs.
*/
private def dependencyExpression(jobs: Iterable[LsfJob], runOnSuccess: Boolean) = {
val jobNames = jobs.toSet[LsfJob].map(_.name)
if (runOnSuccess)
jobNames.mkString("done(\"", "\") && done(\"", "\")")
else
jobNames.mkString("ended(\"", "\") && ended(\"", "\")")
}
}

View File

@ -6,22 +6,27 @@ import scala.collection.JavaConversions
import scala.collection.JavaConversions._
import org.broadinstitute.sting.queue.function.{MappingFunction, CommandLineFunction, QFunction}
import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
import org.broadinstitute.sting.queue.util.{CollectionUtils, Logging}
import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.QException
import org.jgrapht.alg.CycleDetector
import org.jgrapht.EdgeFactory
import org.jgrapht.ext.DOTExporter
import org.broadinstitute.sting.queue.function.DispatchFunction
import org.broadinstitute.sting.queue.function.gatk.GatkFunction
import java.io.File
/**
* The internal dependency tracker between sets of function input and output files.
*/
class QGraph extends Logging {
var dryRun = true
var bsubAllJobs = false
var bsubWaitJobs = false
var properties = Map.empty[String, String]
val jobGraph = newGraph
def numJobs = JavaConversions.asSet(jobGraph.edgeSet).filter(_.isInstanceOf[CommandLineFunction]).size
/**
* Adds a QScript created CommandLineFunction to the graph.
* @param command Function to add to the graph.
*/
def add(command: CommandLineFunction) {
addFunction(command)
}
@ -49,22 +54,30 @@ class QGraph extends Logging {
jobGraph.removeAllVertices(jobGraph.vertexSet.filter(isOrphan(_)))
}
/**
* Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
*/
def run = {
var isReady = true
var totalMissingValues = 0
for (function <- JavaConversions.asSet(jobGraph.edgeSet)) {
function match {
case cmd: CommandLineFunction =>
val missingValues = cmd.missingValues
if (missingValues.size > 0) {
isReady = false
logger.error("Missing values for function: %s".format(cmd.commandLine))
for (missing <- missingValues)
val missingFieldValues = cmd.missingFields
if (missingFieldValues.size > 0) {
totalMissingValues += missingFieldValues.size
logger.error("Missing %s values for function: %s".format(missingFieldValues.size, cmd.commandLine))
for (missing <- missingFieldValues)
logger.error(" " + missing)
}
case _ =>
}
}
if (totalMissingValues > 0) {
isReady = false
}
val detector = new CycleDetector(jobGraph)
if (detector.detectCycles) {
logger.error("Cycles were detected in the graph:")
@ -75,11 +88,29 @@ class QGraph extends Logging {
if (isReady || this.dryRun)
(new TopologicalJobScheduler(this) with LsfJobRunner).runJobs
if (totalMissingValues > 0) {
logger.error("Total missing values: " + totalMissingValues)
}
if (isReady && this.dryRun) {
logger.info("Dry run completed successfully!")
logger.info("Re-run with \"-run\" to execute the functions.")
}
}
/**
* Creates a new graph where if new edges are needed (for cyclic dependency checking) they can be automatically created using a generic MappingFunction.
* @return A new graph
*/
private def newGraph = new SimpleDirectedGraph[QNode, QFunction](new EdgeFactory[QNode, QFunction] {
def createEdge(input: QNode, output: QNode) = new MappingFunction(input.items, output.items)})
def createEdge(input: QNode, output: QNode) = new MappingFunction(input.files, output.files)})
/**
* Adds a generic QFunction to the graph.
* If the function is scatterable and the jobs request bsub, splits the job into parts and adds the parts instead.
* @param f Generic QFunction to add to the graph.
*/
private def addFunction(f: QFunction): Unit = {
try {
f.freeze
@ -113,31 +144,53 @@ class QGraph extends Logging {
}
}
private def addCollectionInputs(value: Any): Unit = {
CollectionUtils.foreach(value, (item, collection) =>
addMappingEdge(item, collection))
/**
* Checks to see if the set of files has more than one file and if so adds input mappings between the set and the individual files.
* @param files Set to check.
*/
private def addCollectionInputs(files: Set[File]): Unit = {
if (files.size > 1)
for (file <- files)
addMappingEdge(Set(file), files)
}
private def addCollectionOutputs(value: Any): Unit = {
CollectionUtils.foreach(value, (item, collection) =>
addMappingEdge(collection, item))
/**
* Checks to see if the set of files has more than one file and if so adds output mappings between the individual files and the set.
* @param files Set to check.
*/
private def addCollectionOutputs(files: Set[File]): Unit = {
if (files.size > 1)
for (file <- files)
addMappingEdge(files, Set(file))
}
private def addMappingEdge(input: Any, output: Any) = {
val inputSet = asSet(input)
val outputSet = asSet(output)
val hasEdge = inputSet == outputSet ||
jobGraph.getEdge(QNode(inputSet), QNode(outputSet)) != null ||
jobGraph.getEdge(QNode(outputSet), QNode(inputSet)) != null
/**
* Adds a directed graph edge between the input set and the output set if there isn't a direct relationship between the two nodes already.
* @param input Input set of files.
* @param output Output set of files.
*/
private def addMappingEdge(input: Set[File], output: Set[File]) = {
val hasEdge = input == output ||
jobGraph.getEdge(QNode(input), QNode(output)) != null ||
jobGraph.getEdge(QNode(output), QNode(input)) != null
if (!hasEdge)
addFunction(new MappingFunction(inputSet, outputSet))
addFunction(new MappingFunction(input, output))
}
private def asSet(value: Any): Set[Any] = if (value.isInstanceOf[Set[_]]) value.asInstanceOf[Set[Any]] else Set(value)
/**
* Returns true if the edge is an internal mapping edge.
* @param edge Edge to check.
* @return true if the edge is an internal mapping edge.
*/
private def isMappingEdge(edge: QFunction) =
edge.isInstanceOf[MappingFunction]
/**
* Returns true if the edge is mapping edge that is not needed because it does
* not direct input or output from a user generated CommandLineFunction.
* @param edge Edge to check.
* @return true if the edge is not needed in the graph.
*/
private def isFiller(edge: QFunction) = {
if (isMappingEdge(edge)) {
if (jobGraph.outgoingEdgesOf(jobGraph.getEdgeTarget(edge)).size == 0)
@ -148,9 +201,19 @@ class QGraph extends Logging {
} else false
}
/**
* Returns true if the node is not connected to any edges.
* @param node Node (set of files) to check
* @return true if this set of files is not needed in the graph.
*/
private def isOrphan(node: QNode) =
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
/**
* Outputs the graph to a .dot file.
* http://en.wikipedia.org/wiki/DOT_language
* @param file Path to output the .dot file.
*/
def renderToDot(file: java.io.File) = {
val out = new java.io.FileWriter(file)

View File

@ -1,6 +1,9 @@
package org.broadinstitute.sting.queue.engine
import java.io.File
/**
* Represents a state between QFunctions the directed acyclic QGraph
* @param files The set of files that represent this node state.
*/
case class QNode (val items: Set[Any])
case class QNode (val files: Set[File])

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.util.{Logging, ShellJob}
import org.broadinstitute.sting.queue.function.CommandLineFunction
/**
* Runs jobs one at a time locally
*/
trait ShellJobRunner extends Logging {
/**
* Runs the function on the local shell.
* @param function Command to run.
* @param qGraph graph that holds the job, and if this is a dry run.
*/
def run(function: CommandLineFunction, qGraph: QGraph) = {
val job = new ShellJob
job.command = function.commandLine
job.workingDir = function.commandDirectory
job.outputFile = function.jobOutputFile
job.errorFile = function.jobErrorFile
if (logger.isDebugEnabled) {
logger.debug(function.commandDirectory + " > " + function.commandLine)
} else {
logger.info(function.commandLine)
}
if (!qGraph.dryRun)
job.run
}
}

View File

@ -7,21 +7,29 @@ import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.function._
/**
* Loops over the job graph running jobs as the edges are traversed
* Loops over the job graph running jobs as the edges are traversed.
* @param val The graph that contains the jobs to be run.
*/
abstract class TopologicalJobScheduler(private val qGraph: QGraph)
extends CommandLineRunner with DispatchJobRunner with Logging {
extends ShellJobRunner with DispatchJobRunner with Logging {
protected val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QFunction] {
/**
* As each edge is traversed, either dispatch the job or run it locally.
* @param event Event holding the edge that was passed.
*/
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QFunction]) = event.getEdge match {
case f: DispatchFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph)
case f: CommandLineFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph)
case f: CommandLineFunction => run(f, qGraph)
case f: MappingFunction => /* do nothing for mapping functions */
}
})
/**
* Runs the jobs by traversing the graph.
*/
def runJobs = {
logger.info("Number of jobs: %s".format(qGraph.numJobs))
if (logger.isTraceEnabled)
@ -39,7 +47,6 @@ abstract class TopologicalJobScheduler(private val qGraph: QGraph)
if (qGraph.bsubAllJobs && qGraph.bsubWaitJobs) {
logger.info("Waiting for jobs to complete.")
val wait = new DispatchWaitFunction
wait.properties = qGraph.properties
wait.freeze
dispatch(wait, qGraph)
}

View File

@ -0,0 +1,17 @@
package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.queue.function.JarCommandLineFunction
import org.broadinstitute.sting.commandline.Argument
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
/**
* Merges BAM files using Picards MergeSampFiles.jar.
* At the Broad the jar can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
*/
class BamGatherFunction extends GatherFunction with JarCommandLineFunction {
@Argument(doc="Compression level 1-9", required=false)
var compressionLevel: Option[Int] = None
override def commandLine = super.commandLine + "%s%s%s".format(
optional(" COMPRESSION_LEVEL=", compressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
}

View File

@ -0,0 +1,35 @@
package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.queue.function.CommandLineFunction
import java.io.File
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
/**
* Indexes a BAM file.
* By default uses samtools index.
* The syntax of the script must be:
* <bamIndexScript> <bam_file> <bam_index_file>
*/
class BamIndexFunction extends CommandLineFunction {
@Argument(doc="BAM file script")
var bamIndexScript: String = "samtools index"
@Input(doc="BAM file to index")
var bamFile: File = _
@Output(doc="BAM file index to output", required=false)
var bamFileIndex: File = _
/**
* Sets the bam file index to the bam file name + ".bai".
*/
override def freezeFieldValues = {
super.freezeFieldValues
if (bamFileIndex == null && bamFile != null)
bamFileIndex = new File(bamFile.getPath + ".bai")
}
def commandLine = "%s %s %s".format(bamIndexScript, bamFile, bamFileIndex)
override def dotString = "Index: %s".format(bamFile.getName)
}

View File

@ -0,0 +1,8 @@
package org.broadinstitute.sting.queue.extensions.gatk
/**
* Splits intervals by contig instead of evenly.
*/
class ContigScatterFunction extends IntervalScatterFunction {
splitIntervalsScript = "splitIntervalsByContig.py"
}

View File

@ -0,0 +1,16 @@
package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.commandline.Argument
import org.broadinstitute.sting.queue.function.scattergather.ScatterFunction
/**
* An interval scatter function that allows the script to be swapped out.
* The syntax of the script must be:
* <splitIntervalsScript> <intervals_file> <split_intervals_1> [.. <split_intervals_n>]
*/
class IntervalScatterFunction extends ScatterFunction {
@Argument(doc="Interval split script")
var splitIntervalsScript: String = "splitIntervals.sh"
def commandLine = "%s %s%s".format(splitIntervalsScript, originalInput, repeat(" ", scatterParts))
}

View File

@ -0,0 +1,14 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
/**
* Used to provide -B rodBinding arguments to the GATK.
*/
case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider {
require(trackName != null, "RodBind trackName cannot be null")
require(trackType != null, "RodBind trackType cannot be null")
require(file != null, "RodBind file cannot be null")
override def toString = "%s,%s,%s".format(trackName, trackType, file)
}

View File

@ -1,74 +1,402 @@
package org.broadinstitute.sting.queue.function
import org.broadinstitute.sting.queue.util._
import java.lang.reflect.Field
import java.lang.annotation.Annotation
import org.broadinstitute.sting.commandline.{Input, Output}
import org.broadinstitute.sting.commandline._
import java.io.File
import collection.JavaConversions._
import org.broadinstitute.sting.queue.function.scattergather.{SimpleTextGatherFunction, Gather}
import java.lang.management.ManagementFactory
import org.broadinstitute.sting.queue.QException
trait CommandLineFunction extends InputOutputFunction with DispatchFunction {
var properties = Map.empty[String, String]
/**
* A command line that will be run in a pipeline.
*/
trait CommandLineFunction extends QFunction with Logging {
def commandLine: String
def inputFieldsWithValues = inputFields.filter(hasFieldValue(_))
def outputFieldsWithValues = outputFields.filter(hasFieldValue(_))
/** Upper memory limit */
var memoryLimit: Option[Int] = None
/** Whether a job is restartable */
var jobRestartable = true
/** Directory to run the command in. */
var commandDirectory: File = IOUtils.CURRENT_DIR
/** Prefix for automatic job name creation */
var jobNamePrefix: String = CommandLineFunction.processNamePrefix
/** The name name of the job */
var jobName: String = _
/** Job project to run the command */
var jobProject = "Queue"
/** Job queue to run the command */
var jobQueue = "broad"
/** Temporary directory to write any files */
var jobTempDir: File = new File(System.getProperty("java.io.tmpdir"))
/** If true this function will run only if the jobs it is dependent on succeed. */
var jobRunOnlyIfPreviousSucceed = true
/** File to redirect any output. Defaults to <jobName>.out */
@Output(doc="File to redirect any output", required=false)
@Gather(classOf[SimpleTextGatherFunction])
var jobOutputFile: File = _
/** File to redirect any errors. Defaults to <jobName>.out */
@Output(doc="File to redirect any errors", required=false)
@Gather(classOf[SimpleTextGatherFunction])
var jobErrorFile: File = _
/** The complete list of fields on this CommandLineFunction. */
lazy val functionFields: List[ArgumentSource] = ParsingEngine.extractArgumentSources(this.getClass).toList
/** The @Input fields on this CommandLineFunction. */
lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input]))
/** The @Output fields on this CommandLineFunction. */
lazy val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output]))
/** The @Argument fields on this CommandLineFunction. */
lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument]))
/**
* Sets parameters from the arg map.
* Returns set of directories required to run the command.
* @return Set of directories required to run the command.
*/
override def freeze = {
for ((name, value) <- properties) addOrUpdateWithStringValue(name, value)
def jobDirectories = {
var dirs = Set.empty[File]
dirs += commandDirectory
if (jobTempDir != null)
dirs += jobTempDir
dirs ++= inputs.map(_.getParentFile)
dirs ++= outputs.map(_.getParentFile)
dirs
}
/**
* Returns the input files for this function.
* @return Set[File] inputs for this function.
*/
def inputs = getFieldFiles(inputFields)
/**
* Returns the output files for this function.
* @return Set[File] outputs for this function.
*/
def outputs = getFieldFiles(outputFields)
/**
* Gets the files from the fields. The fields must be a File, a FileProvider, or a List or Set of either.
* @param fields Fields to get files.
* @return Set[File] for the fields.
*/
private def getFieldFiles(fields: List[ArgumentSource]): Set[File] = {
var files = Set.empty[File]
for (field <- fields)
files ++= getFieldFiles(field)
files
}
/**
* Gets the files from the field. The field must be a File, a FileProvider, or a List or Set of either.
* @param fields Field to get files.
* @return Set[File] for the field.
*/
def getFieldFiles(field: ArgumentSource): Set[File] = {
var files = Set.empty[File]
CollectionUtils.foreach(getFieldValue(field), (fieldValue) => {
val file = fieldValueToFile(field, fieldValue)
if (file != null)
files += file
})
files
}
/**
* Gets the file from the field. The field must be a File or a FileProvider and not a List or Set.
* @param field Field to get the file.
* @return File for the field.
*/
def getFieldFile(field: ArgumentSource): File =
fieldValueToFile(field, getFieldValue(field))
/**
* Converts the field value to a file. The field must be a File or a FileProvider.
* @param field Field to get the file.
* @param value Value of the File or FileProvider or null.
* @return Null if value is null, otherwise the File.
* @throws QException if the value is not a File or FileProvider.
*/
private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match {
case file: File => file
case fileProvider: FileProvider => fileProvider.file
case null => null
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or implement FileProvider: %s: %s".format(field.field, unknown))
}
/**
* Resets the field to the temporary directory.
* @param field Field to get and set the file.
* @param tempDir new root for the file.
*/
def resetFieldFile(field: ArgumentSource, tempDir: File): File = {
getFieldValue(field) match {
case file: File => {
val newFile = IOUtils.resetParent(tempDir, file)
setFieldValue(field, newFile)
newFile
}
case fileProvider: FileProvider => {
fileProvider.file = IOUtils.resetParent(tempDir, fileProvider.file)
fileProvider.file
}
case null => null
case unknown =>
throw new QException("Unable to set file from %s: %s".format(field, unknown))
}
}
/**
* The function description in .dot files
*/
override def dotString = jobName + " => " + commandLine
/**
* Sets all field values and makes them canonical so that the graph can
* match the inputs of one function to the output of another using equals().
*/
final override def freeze = {
freezeFieldValues
canonFieldValues
super.freeze
}
/**
* Sets all field values.
*/
def freezeFieldValues = {
if (jobName == null)
jobName = CommandLineFunction.nextJobName(jobNamePrefix)
if (jobOutputFile == null)
jobOutputFile = new File(jobName + ".out")
commandDirectory = IOUtils.subDir(IOUtils.CURRENT_DIR, commandDirectory)
}
/**
* Makes all field values canonical so that the graph can match the
* inputs of one function to the output of another using equals().
*/
def canonFieldValues = {
for (field <- this.functionFields) {
var fieldValue = this.getFieldValue(field)
fieldValue = CollectionUtils.updated(fieldValue, canon).asInstanceOf[AnyRef]
this.setFieldValue(field, fieldValue)
}
}
/**
* Set value to a uniform value across functions.
* Base implementation changes any relative path to an absolute path.
* @param value to be updated
* @returns the modified value, or a copy if the value is immutable
*/
protected def canon(value: Any) = {
value match {
case file: File => absolute(file)
case fileProvider: FileProvider => fileProvider.file = absolute(fileProvider.file); fileProvider
case x => x
}
}
/**
* Returns the absolute path to the file relative to the job command directory.
* @param file File to root relative to the command directory if it is not already absolute.
* @return The absolute path to file.
*/
private def absolute(file: File) = IOUtils.subDir(commandDirectory, file)
/**
* Repeats parameters with a prefix/suffix if they are set otherwise returns "".
* Skips null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString.
* @param prefix Command line prefix per parameter.
* @param params Traversable parameters.
* @param suffix Optional suffix per parameter.
* @param separator Optional separator per parameter.
* @param format Format string if the value has a value
* @return The generated string
*/
protected def repeat(prefix: String, params: Seq[_], suffix: String = "", separator: String = "") =
params.filter(param => hasValue(param)).map(param => prefix + toValue(param) + suffix).mkString(separator)
protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", format: String = "%s") =
params.filter(param => hasValue(param)).map(param => prefix + toValue(param, format) + suffix).mkString(separator)
/**
* Returns parameter with a prefix/suffix if it is set otherwise returns "".
* Does not output null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString.
* @param prefix Command line prefix per parameter.
* @param param Parameters to check for a value.
* @param suffix Optional suffix per parameter.
* @param format Format string if the value has a value
* @return The generated string
*/
protected def optional(prefix: String, param: Any, suffix: String = "") =
if (hasValue(param)) prefix + toValue(param) + suffix else ""
protected def optional(prefix: String, param: Any, suffix: String = "", format: String = "%s") =
if (hasValue(param)) prefix + toValue(param, format) + suffix else ""
def missingValues = {
/**
* Returns fields that do not have values which are required.
* @return List[String] names of fields missing values.
*/
def missingFields: List[String] = {
val missingInputs = missingFields(inputFields, classOf[Input])
val missingOutputs = missingFields(outputFields, classOf[Output])
missingInputs | missingOutputs
val missingArguments = missingFields(argumentFields, classOf[Argument])
(missingInputs | missingOutputs | missingArguments).toList.sorted
}
private def missingFields(fields: List[Field], annotation: Class[_ <: Annotation]) = {
/**
* Returns fields that do not have values which are required.
* @param sources Fields to check.
* @param annotation Annotation.
* @return Set[String] names of fields missing values.
*/
private def missingFields(sources: List[ArgumentSource], annotation: Class[_ <: Annotation]): Set[String] = {
var missing = Set.empty[String]
for (field <- fields) {
if (isRequired(field, annotation))
if (!hasValue(ReflectionUtils.getValue(this, field)))
missing += field.getName
for (source <- sources) {
if (isRequired(source, annotation))
if (!hasFieldValue(source))
if (!exclusiveOf(source, annotation).exists(otherSource => hasFieldValue(otherSource)))
missing += "@%s: %s - %s".format(annotation.getSimpleName, source.field.getName, doc(source, annotation))
}
missing
}
private def isRequired(field: Field, annotationClass: Class[_ <: Annotation]) =
getAnnotationValue(field.getAnnotation(annotationClass), "required").asInstanceOf[Boolean]
private def getAnnotationValue(annotation: Annotation, method: String) =
annotation.getClass.getMethod(method).invoke(annotation)
protected def hasFieldValue(field: Field) = hasValue(this.getFieldValue(field))
private def hasValue(param: Any) = param match {
case null => false
case Nil => false
case None => false
case _ => true
/**
* Scala sugar type for checking annotation required and exclusiveOf.
*/
private type ArgumentAnnotation = {
/**
* Returns true if the field is required.
* @return true if the field is required.
*/
def required(): Boolean
/**
* Returns the comma separated list of fields that may be set instead of this field.
* @return the comma separated list of fields that may be set instead of this field.
*/
def exclusiveOf(): String
/**
* Returns the documentation for this field.
* @return the documentation for this field.
*/
def doc(): String
}
private def toValue(param: Any): String = param match {
case null => ""
case Nil => ""
case None => ""
case Some(x) => x.toString
case x => x.toString
/**
* Returns the isRequired value from the field.
* @param field Field to check.
* @param annotation Annotation.
* @return the isRequired value from the field annotation.
*/
private def isRequired(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].required
/**
* Returns an array of ArgumentSources from functionFields listed in the exclusiveOf of the original field
* @param field Field to check.
* @param annotation Annotation.
* @return the Array[ArgumentSource] that may be set instead of the field.
*/
private def exclusiveOf(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].exclusiveOf
.split(",").map(_.trim).filter(_.length > 0)
.map(fieldName => functionFields.find(fieldName == _.field.getName) match {
case Some(x) => x
case None => throw new QException("Unable to find exclusion field %s on %s".format(fieldName, this.getClass.getSimpleName))
})
/**
* Returns the doc value from the field.
* @param field Field to check.
* @param annotation Annotation.
* @return the doc value from the field annotation.
*/
private def doc(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].doc
/**
* Returns true if the field has a value.
* @param source Field to check for a value.
* @return true if the field has a value.
*/
protected def hasFieldValue(source: ArgumentSource) = this.hasValue(this.getFieldValue(source))
/**
* Returns false if the value is null or an empty collection.
* @param value Value to test for null, or a collection to test if it is empty.
* @return false if the value is null, or false if the collection is empty, otherwise true.
*/
private def hasValue(param: Any) = CollectionUtils.isNotNullOrNotEmpty(param)
/**
* Returns "" if the value is null or an empty collection, otherwise return the value.toString.
* @param value Value to test for null, or a collection to test if it is empty.
* @param format Format string if the value has a value
* @return "" if the value is null, or "" if the collection is empty, otherwise the value.toString.
*/
private def toValue(param: Any, format: String): String = if (CollectionUtils.isNullOrEmpty(param)) "" else
param match {
case Some(x) => format.format(x)
case x => format.format(x)
}
/**
* Gets the value of a field.
* @param source Field to get the value for.
* @return value of the field.
*/
def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field)
/**
* Gets the value of a field.
* @param source Field to set the value for.
* @return value of the field.
*/
def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value)
/**
* Walks gets the fields in this object or any collections in that object
* recursively to find the object holding the field to be retrieved or set.
* @param source Field find the invoke object for.
* @return Object to invoke the field on.
*/
private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _))
}
/**
* A command line that will be run in a pipeline.
*/
object CommandLineFunction {
/** A semi-unique job prefix using the host name and the process id. */
private val processNamePrefix = "Q-" + {
var prefix = ManagementFactory.getRuntimeMXBean.getName
val index = prefix.indexOf(".")
if (index >= 0)
prefix = prefix.substring(0, index)
prefix
}
/** Job index counter for this run of Queue. */
private var jobIndex = 0
/**
* Returns the next job name using the prefix.
* @param prefix Prefix of the job name.
* @return the next job name.
*/
private def nextJobName(prefix: String) = {
jobIndex += 1
prefix + "-" + jobIndex
}
}

View File

@ -1,93 +0,0 @@
package org.broadinstitute.sting.queue.function
import java.io.File
import java.lang.management.ManagementFactory
import org.broadinstitute.sting.queue.function.scattergather.{Gather, SimpleTextGatherFunction}
import org.broadinstitute.sting.queue.util.IOUtils
import org.broadinstitute.sting.commandline.{ClassType, Output, Input}
trait DispatchFunction extends InputOutputFunction {
def commandLine: String
@Input(doc="Upper memory limit", required=false)
@ClassType(classOf[Int])
var memoryLimit: Option[Int] = None
/**
* The directory where the command should run.
*/
@Input(doc="Directory to write any files", required=false)
var commandDirectory: File = IOUtils.CURRENT_DIR
@Input(doc="Prefix for automatic job name creation", required=false)
var jobNamePrefix: String = _
@Input(doc="Job name to run on the farm", required=false)
var jobName: String = _
@Output(doc="File to redirect any output", required=false)
@Gather(classOf[SimpleTextGatherFunction])
var jobOutputFile: File = _
@Output(doc="File to redirect any errors", required=false)
@Gather(classOf[SimpleTextGatherFunction])
var jobErrorFile: File = _
@Input(doc="Job project to run the command", required=false)
var jobProject = "Queue"
@Input(doc="Job queue to run the command", required=false)
var jobQueue = "broad"
override def freeze = {
if (jobNamePrefix == null)
jobNamePrefix = DispatchFunction.processNamePrefix
if (jobName == null)
jobName = DispatchFunction.nextJobName(jobNamePrefix)
if (jobOutputFile == null)
jobOutputFile = new File(jobName + ".out")
if (jobErrorFile == null)
jobErrorFile = new File(jobName + ".err")
commandDirectory = IOUtils.absolute(IOUtils.CURRENT_DIR, commandDirectory)
super.freeze
}
override def dotString = jobName + " => " + commandLine
/**
* Override the canon function to change any relative path to an absolute path.
*/
override protected def canon(value: Any) = {
value match {
case file: File => IOUtils.absolute(commandDirectory, file)
case x => super.canon(x)
}
}
def absolute(file: File) = IOUtils.absolute(commandDirectory, file)
def temp(subDir: String) = IOUtils.sub(commandDirectory, jobName + "-" + subDir)
override def toString = commandLine
}
object DispatchFunction {
private val processNamePrefix = "Q-" + {
var prefix = ManagementFactory.getRuntimeMXBean.getName
val index = prefix.indexOf(".")
if (index >= 0)
prefix = prefix.substring(0, index)
prefix
}
private var jobIndex = 0
private def nextJobName(prefix: String) = {
jobIndex += 1
prefix + "-" + jobIndex
}
}

View File

@ -2,10 +2,14 @@ package org.broadinstitute.sting.queue.function
import java.io.File
/** An internal class that is used by bsub to wait on all other jobs before exiting. */
class DispatchWaitFunction extends CommandLineFunction {
/**
* Returns the command line "echo".
* @return echo
*/
def commandLine = "echo"
jobQueue = "short"
jobOutputFile = File.createTempFile("Q-wait", ".out")
jobErrorFile = File.createTempFile("Q-wait", ".err")
}

View File

@ -0,0 +1,11 @@
package org.broadinstitute.sting.queue.function
import java.io.File
/**
* An trait for @Input or @Output CommandLineFunction fields that are not files, but have a File that can be get/set.
*/
trait FileProvider {
/** Gets/Sets the file. */
var file: File
}

View File

@ -1,67 +0,0 @@
package org.broadinstitute.sting.queue.function
import java.lang.reflect.Field
import org.broadinstitute.sting.queue.util._
import org.broadinstitute.sting.commandline.{Input, Output}
/**
* A function with @Inputs and @Outputs tagging fields that can be set by the user in a QScript
*/
trait InputOutputFunction extends QFunction with Cloneable {
def getFieldValue(field: Field) = ReflectionUtils.getValue(this, field)
def setFieldValue(field: Field, value: Any) = ReflectionUtils.setValue(this, field, value)
def functionFields: List[Field] = inputFields ::: outputFields
def inputFields = ReflectionUtils.filterFields(fields, classOf[Input])
def outputFields = ReflectionUtils.filterFields(fields, classOf[Output])
private lazy val fields = ReflectionUtils.getAllFields(this.getClass)
// TODO: Need to handle argument collections where field is not on THIS
def inputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, inputFields)).toSet
def outputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, outputFields)).toSet
/**
* Sets a field value using the name of the field.
* Field must be annotated with @Input or @Output
* @return true if the value was found and set
*/
protected def addOrUpdateWithStringValue(name: String, value: String) = {
fields.find(_.getName == name) match {
case Some(field) =>
val isInput = ReflectionUtils.hasAnnotation(field, classOf[Input])
val isOutput = ReflectionUtils.hasAnnotation(field, classOf[Output])
if (isInput || isOutput) {
ReflectionUtils.addOrUpdateWithStringValue(this, field, value)
}
true
// TODO: Need to handle argument collections where field is not on THIS
case None => false
}
}
def cloneFunction() = clone.asInstanceOf[this.type]
// explicitly overriden so that trait function cloneFunction can use this.clone
override protected def clone = super.clone
/**
* As the function is frozen, changes all fields to their canonical forms.
*/
override def freeze = {
for (field <- this.functionFields)
mapField(field, canon)
super.freeze
}
def mapField(field: Field, f: Any => Any): Any = {
var fieldValue = this.getFieldValue(field)
fieldValue = CollectionUtils.updated(fieldValue, f).asInstanceOf[AnyRef]
this.setFieldValue(field, fieldValue)
fieldValue
}
/**
* Set value to a uniform value across functions.
* The biggest example is file paths relative to the command directory in DispatchFunction
*/
protected def canon(value: Any): Any = value
}

View File

@ -1,8 +0,0 @@
package org.broadinstitute.sting.queue.function
import java.io.File
trait IntervalFunction extends InputOutputFunction {
var referenceFile: File
var intervals: File
}

View File

@ -0,0 +1,15 @@
package org.broadinstitute.sting.queue.function
import org.broadinstitute.sting.commandline.Argument
import java.io.File
/**
* Defines a command line function that runs from a jar file.
*/
trait JarCommandLineFunction extends CommandLineFunction {
@Argument(doc="jar")
var jarFile: File = _
def commandLine = "java%s -Djava.io.tmpdir=%s -jar %s"
.format(optional(" -Xmx", memoryLimit, "g"), jobTempDir, jarFile)
}

View File

@ -1,9 +1,15 @@
package org.broadinstitute.sting.queue.function
import java.io.File
/**
* Utility class to map a set of inputs to set of outputs.
* The QGraph uses this function internally to map between user defined functions.
*/
class MappingFunction(val inputs: Set[Any], val outputs: Set[Any]) extends QFunction {
override def toString = "<map>" // For debugging
class MappingFunction(val inputs: Set[File], val outputs: Set[File]) extends QFunction {
/**
* For debugging purposes returns <map>.
* @returns <map>
*/
override def toString = "<map>"
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.queue.function
import java.io.File
/**
* The base interface for all functions in Queue.
* Inputs and outputs are specified as Sets of values.
@ -16,12 +18,15 @@ trait QFunction {
/**
* Set of inputs for this function.
*/
def inputs: Set[Any]
def inputs: Set[File]
/**
* Set of outputs for this function.
*/
def outputs: Set[Any]
def outputs: Set[File]
/**
* The function description in .dot files
*/
def dotString = ""
}

View File

@ -1,38 +0,0 @@
package org.broadinstitute.sting.queue.function.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.IntervalFunction
import org.broadinstitute.sting.queue.function.scattergather.{Scatter, ScatterGatherableFunction, IntervalScatterFunction}
import org.broadinstitute.sting.commandline.{ClassType, Input}
import org.apache.log4j.Level
trait GatkFunction extends ScatterGatherableFunction with IntervalFunction {
@Input(doc="Temporary directory to write any files", required=false)
var javaTmpDir: String = _
@Input(doc="GATK jar")
var gatkJar: String = _
@Input(doc="Reference fasta")
var referenceFile: File = _
@Input(doc="Bam files", required=false)
@ClassType(classOf[File])
var bamFiles: List[File] = Nil
@Input(doc="Intervals", required=false)
@Scatter(classOf[IntervalScatterFunction])
var intervals: File = _
@Input(doc="DBSNP", required=false)
var dbsnp: File = _
@Input(doc="Logging level", required=false)
var gatkLoggingLevel: String = _
protected def gatkCommandLine(walker: String) =
"java%s%s -jar %s -T %s -R %s%s%s%s%s "
.format(optional(" -Xmx", memoryLimit, "g"), optional(" -Djava.io.tmpdir=", javaTmpDir),
gatkJar, walker, referenceFile, repeat(" -I ", bamFiles), optional(" -l ", gatkLoggingLevel),
optional(" -D ", dbsnp), optional(" -L ", intervals))
}

View File

@ -1,17 +0,0 @@
package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.commandline.Input
class BamGatherFunction extends GatherFunction {
type GatherType = File
@Input(doc="Picard MergeSamFiles.jar. At the Broad this can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
var picardMergeSamFilesJar: String = _
@Input(doc="Compression level 1-9", required=false)
var picardMergeCompressionLevel: Option[Int] = None
def commandLine = "java -jar %s%s%s%s".format(picardMergeSamFilesJar,
optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
}

View File

@ -1,15 +1,24 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline.Input
import java.io.File
import org.broadinstitute.sting.commandline.{Argument, Input}
/**
* Removes the temporary directories for scatter / gather.
* The script can be changed by setting rmdirScript.
* By default uses rm -rf.
* The format of the call is <mkdirScript> <dir_1> [.. <dir_n>]
*/
class CleanupTempDirsFunction extends CommandLineFunction {
@Input(doc="Original outputs of the gather functions")
var originalOutputs: Set[Any] = Set.empty[Any]
var originalOutputs: Set[File] = Set.empty[File]
@Input(doc="Temporary directories to be deleted")
var tempDirectories: List[File] = Nil
def commandLine = "rm -rf%s".format(repeat(" '", tempDirectories, "'"))
@Argument(doc="rmdir script or command")
var rmdirScript = "rm -rf"
def commandLine = "%s%s".format(rmdirScript, repeat(" '", tempDirectories, "'"))
}

View File

@ -1,21 +0,0 @@
package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.commandline.Input
import org.broadinstitute.sting.queue.function.IntervalFunction
class ContigScatterFunction extends ScatterFunction {
type ScatterType = File
@Input(doc="Reference file to scatter")
var referenceFile: File = _
override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {
val command = originalFunction.asInstanceOf[IntervalFunction]
referenceFile = command.referenceFile
super.setOriginalFunction(originalFunction)
}
// TODO: Use the reference file for "all"
def commandLine = "splitIntervalsByContig.py %s%s".format(originalInput, repeat(" ", scatterParts))
}

View File

@ -2,25 +2,28 @@ package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline.{Output, Input}
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
/**
* Creates the temporary directories for scatter / gather.
* The script can be changed by setting mkdirScript.
* By default uses mkdir -pv
* The format of the call is <rmdirScript> <dir_1> [.. <dir_n>]
*/
class CreateTempDirsFunction extends CommandLineFunction {
@Input(doc="Original inputs to the scattered function")
var originalInputs: Set[Any] = Set.empty[Any]
var originalInputs: Set[File] = Set.empty[File]
@Output(doc="Temporary directories to create")
var tempDirectories: List[File] = Nil
@Input(doc="Sleep seconds", required=false)
var mkdirSleepSeconds: Option[Int] = None
@Argument(doc="mkdir script or command")
var mkdirScript = "mkdir -pv"
// TODO: After port of LSF submitter use -cwd <dir> instead of trying to run from the directory
// For now, create the directory so that BroadCore can run bsub from it -kshakir July 27, 2010 on chartl's computer
def commandLine = "%s%s".format(mkdirScript, repeat(" '", tempDirectories, "'"))
override def freeze = {
super.freeze
tempDirectories.foreach(_.mkdirs)
}
def commandLine = "mkdir -pv%s%s".format(repeat(" '", tempDirectories, "'"), optional(" && sleep ", mkdirSleepSeconds))
/**
* This function is creating the directories, so returns just this command directory.
*/
override def jobDirectories = Set(commandDirectory)
}

View File

@ -1,17 +0,0 @@
package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.commandline.Input
class FixMatesGatherFunction extends GatherFunction {
type GatherType = File
@Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
var picardFixMatesJar: String = _
@Input(doc="Compression level 1-9", required=false)
var picardMergeCompressionLevel: Option[Int] = None
def commandLine = "java -Djava.io.tmpdir=/broad/shptmp/queue -jar %s%s%s%s".format(picardFixMatesJar,
optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
}

View File

@ -1,20 +1,31 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.{CommandLineFunction}
import org.broadinstitute.sting.commandline.{Input, Output}
import java.io.File
import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output}
/**
* Base class for Gather command line functions.
* NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits.
*/
abstract class GatherFunction extends CommandLineFunction {
type GatherType
trait GatherFunction extends CommandLineFunction {
@Input(doc="Parts to gather back into the original output")
var gatherParts: List[GatherType] = Nil
var gatherParts: List[File] = Nil
@Output(doc="The original output of the scattered function")
var originalOutput: GatherType = _
var originalOutput: File = _
def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {}
/**
* Sets the original function used to create this scatter function.
* @param originalFunction The ScatterGatherableFunction.
* @param gatherField The field being gathered.
*/
def setOriginalFunction(originalFunction: ScatterGatherableFunction, gatherField: ArgumentSource) = {}
/**
* Sets the clone function creating one of the inputs for this gather function.
* @param cloneFunction The clone of the ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param gatherField The field to be gathered.
*/
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, gatherField: ArgumentSource) = {}
}

View File

@ -1,21 +0,0 @@
package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.commandline.Input
import org.broadinstitute.sting.queue.function.IntervalFunction
class IntervalScatterFunction extends ScatterFunction {
type ScatterType = File
@Input(doc="Reference file to scatter")
var referenceFile: File = _
override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {
val command = originalFunction.asInstanceOf[IntervalFunction]
referenceFile = command.referenceFile
super.setOriginalFunction(originalFunction)
}
// TODO: Use the reference file for "all"
def commandLine = "splitIntervals.sh %s%s".format(originalInput, repeat(" ", scatterParts))
}

View File

@ -2,23 +2,33 @@ package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import java.io.File
import org.broadinstitute.sting.commandline.{Input, Output}
import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output}
/**
* Base class for Scatter command line functions.
* NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits.
*/
abstract class ScatterFunction extends CommandLineFunction {
type ScatterType
trait ScatterFunction extends CommandLineFunction {
@Input(doc="Original input to scatter")
var originalInput: ScatterType = _
var originalInput: File = _
@Output(doc="Scattered parts of the original input, one per temp directory")
var scatterParts: List[File] = Nil
@Input(doc="Temporary directories for each scatter part")
var tempDirectories: List[File] = Nil
@Output(doc="Scattered parts of the original input, one per temp directory")
var scatterParts: List[ScatterType] = Nil
/**
* Sets the original function used to create this scatter function.
* @param originalFunction The ScatterGatherableFunction.
* @param scatterField The field being scattered.
*/
def setOriginalFunction(originalFunction: ScatterGatherableFunction, scatterField: ArgumentSource) = {}
def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {}
/**
* Sets the clone function using one of the outputs of this scatter function.
* @param cloneFunction The clone of the ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
* @param scatterField The field being scattered.
*/
def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, scatterField: ArgumentSource) = {}
}

View File

@ -1,141 +1,367 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import java.lang.reflect.Field
import java.io.File
import org.broadinstitute.sting.queue.util._
import org.broadinstitute.sting.commandline.Input
import org.broadinstitute.sting.commandline.ArgumentSource
import org.broadinstitute.sting.queue.function.CommandLineFunction
import com.rits.cloning.Cloner
/**
* A function that can be run faster by splitting it up into pieces and then joining together the results.
*/
trait ScatterGatherableFunction extends CommandLineFunction {
@Input(doc="Number of parts to scatter the function into")
/** Number of parts to scatter the function into" */
var scatterCount: Int = 1
def scatterField = this.inputFields.find(field => ReflectionUtils.hasAnnotation(field, classOf[Scatter])).get
/** scatter gather directory */
var scatterGatherDirectory: File = _
def scatterGatherable = {
if (scatterCount < 2)
false
else if (!hasFieldValue(scatterField))
false
else
true
}
/** cleanup temporary directories */
var cleanupTempDirectories = false
def generateFunctions() = ScatterGatherableFunction.generateFunctions(this)
}
/** Class to use for creating temporary directories. Defaults to CreateTempDirsFunction. */
var createTempDirsClass: Class[_ <: CreateTempDirsFunction] = _
object ScatterGatherableFunction {
private def generateFunctions(originalFunction: ScatterGatherableFunction) = {
/** Class to use for scattering. Defaults to the annotation used in the @Scatter tag. */
var scatterClass: Class[_ <: ScatterFunction] = _
/**
* Function that returns the class to use for gathering a directory. If it returns null then @Gather annotation will be used.
* @param gatherField Field that is to be gathered.
* @return The class of the GatherFunction to be used or null.
*/
var gatherClass: PartialFunction[ArgumentSource, Class[_ <: GatherFunction]] = _
/** Class to use for removing temporary directories. Defaults to CleanupTempDirsFunction. */
var cleanupTempDirsClass: Class[_ <: CleanupTempDirsFunction] = _
/**
* Allows external modification of the CreateTempDirsFunction that will create the temporary directories.
* @param initializeFunction The function that will create the temporary directories.
* @param inputFields The input fields that the original function was dependent on.
*/
var setupInitializeFunction: PartialFunction[(CreateTempDirsFunction, List[ArgumentSource]), Unit] = _
/**
* Allows external modification of the ScatterFunction that will create the scatter pieces in the temporary directories.
* @param scatterFunction The function that will create the scatter pieces in the temporary directories.
* @param scatterField The input field being scattered.
*/
var setupScatterFunction: PartialFunction[(ScatterFunction, ArgumentSource), Unit] = _
/**
* Allows external modification of the GatherFunction that will collect the gather pieces in the temporary directories.
* @param gatherFunction The function that will merge the gather pieces from the temporary directories.
* @param gatherField The output field being gathered.
*/
var setupGatherFunction: PartialFunction[(GatherFunction, ArgumentSource), Unit] = _
/**
* Allows external modification of the cloned function.
* @param cloneFunction The clone of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
var setupCloneFunction: PartialFunction[(ScatterGatherableFunction, Int), Unit] = _
/**
* Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories.
* @param cleanupFunction The function that will remove the temporary directories.
* @param gatherFunctions The functions that will gather up the original output fields.
* @param outputFields The output fields that the original function was dependent on.
*/
var setupCleanupFunction: PartialFunction[(CleanupTempDirsFunction, Map[ArgumentSource, GatherFunction], List[ArgumentSource]), Unit] = _
/**
* Returns true if the function is ready to be scatter / gathered.
* The base implementation checks if the scatter count is greater than one,
* and that the scatter field has a value.
* @return true if the function is ready to be scatter / gathered.
*/
def scatterGatherable = this.scatterCount > 1 && hasFieldValue(this.scatterField)
/**
* Returns a list of scatter / gather and clones of this function
* that can be run in parallel to produce the same output as this
* command line function.
* @return List[CommandLineFunction] to run instead of this function.
*/
def generateFunctions() = {
var functions = List.empty[CommandLineFunction]
var tempDirectories = List.empty[File]
// Create a function that will remove any temporary items
var cleanupFunction = new CleanupTempDirsFunction
cleanupFunction.properties = originalFunction.properties
cleanupFunction.jobNamePrefix = originalFunction.jobNamePrefix
cleanupFunction.commandDirectory = originalFunction.commandDirectory
// Find the field with @Scatter and its value
var scatterField = originalFunction.scatterField
val originalValue = originalFunction.getFieldValue(scatterField)
// Only depend on input fields that have a value
val inputFieldsWithValues = this.inputFields.filter(hasFieldValue(_))
// Only gather up fields that will have a value
val outputFieldsWithValues = this.outputFields.filter(hasFieldValue(_))
// Create the scatter function based on @Scatter
val scatterFunction = getScatterFunction(scatterField)
scatterFunction.setOriginalFunction(originalFunction)
scatterFunction.properties = originalFunction.properties
scatterFunction.jobNamePrefix = originalFunction.jobNamePrefix
scatterFunction.commandDirectory = originalFunction.temp("scatter-" + scatterField.getName)
scatterFunction.originalInput = originalValue.asInstanceOf[scatterFunction.ScatterType]
val scatterFunction = this.newScatterFunction(this.scatterField)
initScatterFunction(scatterFunction, this.scatterField)
tempDirectories :+= scatterFunction.commandDirectory
functions :+= scatterFunction
// Create the gather functions for each output field
var gatherFunctions = Map.empty[Field, GatherFunction]
for (outputField <- originalFunction.outputFieldsWithValues) {
// Create the gather function based on @Gather
val gatherFunction = getGatherFunction(outputField)
gatherFunction.setOriginalFunction(originalFunction)
gatherFunction.properties = originalFunction.properties
gatherFunction.jobNamePrefix = originalFunction.jobNamePrefix
gatherFunction.commandDirectory = originalFunction.temp("gather-" + outputField.getName)
val gatheredValue = originalFunction.getFieldValue(outputField).asInstanceOf[gatherFunction.GatherType]
gatherFunction.originalOutput = gatheredValue
var gatherFunctions = Map.empty[ArgumentSource, GatherFunction]
for (gatherField <- outputFieldsWithValues) {
val gatherFunction = this.newGatherFunction(gatherField)
initGatherFunction(gatherFunction, gatherField)
tempDirectories :+= gatherFunction.commandDirectory
cleanupFunction.originalOutputs += gatheredValue
functions :+= gatherFunction
gatherFunctions += outputField -> gatherFunction
gatherFunctions += gatherField -> gatherFunction
}
// Create the clone functions for running the parallel jobs
var cloneFunctions = List.empty[CommandLineFunction]
for (i <- 1 to originalFunction.scatterCount) {
val cloneFunction = newFunctionClone(originalFunction)
for (i <- 1 to this.scatterCount) {
val cloneFunction = this.newCloneFunction()
initCloneFunction(cloneFunction, i)
cloneFunctions :+= cloneFunction
tempDirectories :+= cloneFunction.commandDirectory
val tempDir = originalFunction.temp("temp-"+i)
cloneFunction.commandDirectory = tempDir
tempDirectories :+= tempDir
// Reset the input of the clone to the the temp dir and add it as an output of the scatter
var scatterPart = CollectionUtils.updated(originalValue, resetToTempDir(tempDir))
scatterFunction.scatterParts :+= scatterPart.asInstanceOf[scatterFunction.ScatterType]
cloneFunction.setFieldValue(scatterField, scatterPart)
// For each each output field, change value to the temp dir and feed it into the gatherer
for (outputField <- originalFunction.outputFields) {
val gatherFunction = gatherFunctions(outputField)
val gatherPart = cloneFunction.mapField(outputField, resetToTempDir(tempDir))
gatherFunction.gatherParts :+= gatherPart.asInstanceOf[gatherFunction.GatherType]
}
bindCloneFunctionScatter(scatterFunction, this.scatterField, cloneFunction, i)
// For each each output field, change value to the scatterGatherTempDir dir and feed it into the gatherer
for (gatherField <- outputFieldsWithValues)
bindCloneFunctionGather(gatherFunctions(gatherField), gatherField, cloneFunction, i)
}
functions = cloneFunctions ::: functions
functions ++= cloneFunctions
// Create a function to create all of the temp directories.
// Create a function to create all of the scatterGatherTempDir directories.
// All of its inputs are the inputs of the original function.
val initializeFunction = new CreateTempDirsFunction
initializeFunction.properties = originalFunction.properties
initializeFunction.jobNamePrefix = originalFunction.jobNamePrefix
initializeFunction.commandDirectory = originalFunction.commandDirectory
val initializeFunction = this.newInitializeFunction()
initInitializeFunction(initializeFunction, inputFieldsWithValues)
for (inputField <- originalFunction.inputFieldsWithValues)
initializeFunction.originalInputs += originalFunction.getFieldValue(inputField)
// Create a function that will remove any temporary items
// All of its inputs are the outputs of the original function.
var cleanupFunction = newCleanupFunction()
initCleanupFunction(cleanupFunction, gatherFunctions, outputFieldsWithValues)
// Set the temporary directories, for the initialize function as outputs for scatter and cleanup as inputs.
initializeFunction.tempDirectories = tempDirectories
scatterFunction.tempDirectories = tempDirectories
cleanupFunction.tempDirectories = tempDirectories
functions +:= initializeFunction
functions :+= cleanupFunction
if (this.cleanupTempDirectories)
functions :+= cleanupFunction
// Return all the various functions we created
functions
}
private def resetToTempDir(tempDir: File): Any => Any = {
(any: Any) => {
any match {
case file: File => IOUtils.reset(tempDir, file)
case x => x
}
}
/**
* Sets the scatter gather directory to the command directory if it is not already set.
*/
override def freezeFieldValues = {
super.freezeFieldValues
if (this.scatterGatherDirectory == null)
this.scatterGatherDirectory = this.commandDirectory
}
private def getScatterFunction(inputField: Field) =
ReflectionUtils.getAnnotation(inputField, classOf[Scatter]).value.newInstance.asInstanceOf[ScatterFunction]
/**
* Retrieves the scatter field from the first field that has the annotation @Scatter.
*/
protected lazy val scatterField =
this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get
private def getGatherFunction(outputField: Field) =
ReflectionUtils.getAnnotation(outputField, classOf[Gather]).value.newInstance.asInstanceOf[GatherFunction]
/**
* Creates a new initialize CreateTempDirsFunction that will create the temporary directories.
* @return A CreateTempDirsFunction that will create the temporary directories.
*/
protected def newInitializeFunction(): CreateTempDirsFunction = {
if (createTempDirsClass != null)
this.createTempDirsClass.newInstance
else
new CreateTempDirsFunction
}
private def newFunctionClone(originalFunction: ScatterGatherableFunction) = {
val cloneFunction = originalFunction.cloneFunction.asInstanceOf[ScatterGatherableFunction]
/**
* Initializes the CreateTempDirsFunction that will create the temporary directories.
* The initializeFunction jobNamePrefix is set so that the CreateTempDirsFunction runs with the same prefix as this ScatterGatherableFunction.
* The initializeFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction.
* The initializeFunction is modified to become dependent on the input files for this ScatterGatherableFunction.
* Calls setupInitializeFunction with initializeFunction.
* @param initializeFunction The function that will create the temporary directories.
* @param inputFields The input fields that the original function was dependent on.
*/
protected def initInitializeFunction(initializeFunction: CreateTempDirsFunction, inputFields: List[ArgumentSource]) = {
initializeFunction.jobNamePrefix = this.jobNamePrefix
initializeFunction.commandDirectory = this.commandDirectory
for (inputField <- inputFields)
initializeFunction.originalInputs ++= this.getFieldFiles(inputField)
if (this.setupInitializeFunction != null)
if (this.setupInitializeFunction.isDefinedAt(initializeFunction, inputFields))
this.setupInitializeFunction(initializeFunction, inputFields)
}
/**
* Creates a new ScatterFunction for the scatterField.
* @param scatterField Field that defined @Scatter.
* @return A ScatterFunction instantiated from @Scatter or scatterClass if scatterClass was set on this ScatterGatherableFunction.
*/
protected def newScatterFunction(scatterField: ArgumentSource): ScatterFunction = {
var scatterClass = this.scatterClass
if (scatterClass == null)
scatterClass = ReflectionUtils.getAnnotation(scatterField.field, classOf[Scatter])
.value.asSubclass(classOf[ScatterFunction])
scatterClass.newInstance.asInstanceOf[ScatterFunction]
}
/**
* Initializes the ScatterFunction created by newScatterFunction() that will create the scatter pieces in the temporary directories.
* The scatterFunction jobNamePrefix is set so that the ScatterFunction runs with the same prefix as this ScatterGatherableFunction.
* The scatterFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory.
* The scatterFunction has it's originalInput set with the file to be scattered into scatterCount pieces.
* Calls scatterFunction.setOriginalFunction with this ScatterGatherableFunction.
* Calls setupScatterFunction with scatterFunction.
* @param scatterFunction The function that will create the scatter pieces in the temporary directories.
* @param scatterField The input field being scattered.
*/
protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = {
scatterFunction.jobNamePrefix = this.jobNamePrefix
scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName)
scatterFunction.originalInput = this.getFieldFile(scatterField)
scatterFunction.setOriginalFunction(this, scatterField)
if (this.setupScatterFunction != null)
if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField))
this.setupScatterFunction(scatterFunction, scatterField)
}
/**
* Creates a new GatherFunction for the gatherField.
* @param gatherField Field that defined @Gather.
* @return A GatherFunction instantiated from @Gather.
*/
protected def newGatherFunction(gatherField: ArgumentSource) : GatherFunction = {
var gatherClass: Class[_ <: GatherFunction] = null
if (this.gatherClass != null)
if (this.gatherClass.isDefinedAt(gatherField))
gatherClass = this.gatherClass(gatherField)
if (gatherClass == null)
gatherClass = ReflectionUtils.getAnnotation(gatherField.field, classOf[Gather])
.value.asSubclass(classOf[GatherFunction])
gatherClass.newInstance.asInstanceOf[GatherFunction]
}
/**
* Initializes the GatherFunction created by newGatherFunction() that will collect the gather pieces in the temporary directories.
* The gatherFunction jobNamePrefix is set so that the GatherFunction runs with the same prefix as this ScatterGatherableFunction.
* The gatherFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory.
* The gatherFunction has it's originalOutput set with the file to be gathered from the scatterCount pieces.
* Calls the gatherFunction.setOriginalFunction with this ScatterGatherableFunction.
* Calls setupGatherFunction with gatherFunction.
* @param gatherFunction The function that will merge the gather pieces from the temporary directories.
* @param gatherField The output field being gathered.
*/
protected def initGatherFunction(gatherFunction: GatherFunction, gatherField: ArgumentSource) = {
gatherFunction.jobNamePrefix = this.jobNamePrefix
gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName)
gatherFunction.originalOutput = this.getFieldFile(gatherField)
gatherFunction.setOriginalFunction(this, gatherField)
if (this.setupGatherFunction != null)
if (this.setupGatherFunction.isDefinedAt(gatherFunction, gatherField))
this.setupGatherFunction(gatherFunction, gatherField)
}
/**
* Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter.
* @return A clone of this ScatterGatherableFunction
*/
protected def newCloneFunction(): ScatterGatherableFunction = {
val cloneFunction = ScatterGatherableFunction.cloner.deepClone(this)
// Make sure clone doesn't get scattered
cloneFunction.scatterCount = 1
cloneFunction
}
/**
* Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory.
* Calls setupCloneFunction with cloneFunction.
* @param cloneFunction The clone of this ScatterGatherableFunction
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
protected def initCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int) = {
cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index)
if (this.setupCloneFunction != null)
if (this.setupCloneFunction.isDefinedAt(cloneFunction, index))
this.setupCloneFunction(cloneFunction, index)
}
/**
* Joins a piece of the ScatterFunction output to the cloned function's input.
* The input of the clone is changed to be in the output directory of the clone.
* The scatter function piece is added as an output of the scatterFunction.
* The clone function's original input is changed to use the piece from the output directory.
* Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction.
* @param scatterFunction Function that will create the pieces including the piece that will go to cloneFunction.
* @param scatterField The field to be scattered.
* @param cloneFunction Clone of this ScatterGatherableFunction.
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
*/
protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
// Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter
val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput)
scatterFunction.scatterParts :+= scatterPart
cloneFunction.setFieldValue(scatterField, scatterPart)
scatterFunction.setCloneFunction(cloneFunction, index, scatterField)
}
/**
* Joins the cloned function's output as a piece of the GatherFunction's input.
* Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction.
* @param cloneFunction Clone of this ScatterGatherableFunction.
* @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction.
* @param gatherField The field to be gathered.
*/
protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory)
gatherFunction.gatherParts :+= gatherPart
gatherFunction.setCloneFunction(cloneFunction, index, gatherField)
}
/**
* Creates a new function that will remove the temporary directories.
* @return A CleanupTempDirs function that will remove the temporary directories.
*/
protected def newCleanupFunction(): CleanupTempDirsFunction = {
if (cleanupTempDirsClass != null)
this.cleanupTempDirsClass.newInstance
else
new CleanupTempDirsFunction
}
/**
* Initializes the CleanupTempDirsFunction created by newCleanupFunction() that will remove the temporary directories.
* The cleanupFunction jobNamePrefix is set so that the CleanupTempDirsFunction runs with the same prefix as this ScatterGatherableFunction.
* The cleanupFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction.
* The initializeFunction is modified to become dependent on the output files for this ScatterGatherableFunction.
* Calls setupCleanupFunction with cleanupFunction.
* @param cleanupFunction The function that will remove the temporary directories.
* @param gatherFunctions The functions that will gather up the original output fields.
* @param outputFields The output fields that the original function was dependent on.
*/
protected def initCleanupFunction(cleanupFunction: CleanupTempDirsFunction, gatherFunctions: Map[ArgumentSource, GatherFunction], outputFields: List[ArgumentSource]) = {
cleanupFunction.jobNamePrefix = this.jobNamePrefix
cleanupFunction.commandDirectory = this.commandDirectory
for (gatherField <- outputFields)
cleanupFunction.originalOutputs += gatherFunctions(gatherField).originalOutput
if (this.setupCleanupFunction != null)
if (this.setupCleanupFunction.isDefinedAt(cleanupFunction, gatherFunctions, outputFields))
this.setupCleanupFunction(cleanupFunction, gatherFunctions, outputFields)
}
/**
* Returns a temporary directory under this scatter gather directory.
* @param Sub directory under the scatter gather directory.
* @return temporary directory under this scatter gather directory.
*/
private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-" + subDir)
}
/**
* A function that can be run faster by splitting it up into pieces and then joining together the results.
*/
object ScatterGatherableFunction {
/** Used to deep clone a ScatterGatherableFunction. */
private lazy val cloner = new Cloner
}

View File

@ -1,10 +1,16 @@
package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.commandline.Argument
/**
* Merges a text file.
* The script can be changed by setting rmdirScript.
* By default uses mergeText.sh in Sting/shell.
* The format of the call is <mergeTextScript> <file_output> <file_1> [.. <file_n>]
*/
class SimpleTextGatherFunction extends GatherFunction {
type GatherType = File
@Argument(doc="merge text script")
var mergeTextScript = "mergeText.sh"
// TODO: Write a text merging utility that takes into account headers.
def commandLine = "mergeText.sh %s%s".format(originalOutput, repeat(" ", gatherParts))
def commandLine = "%s %s%s".format(mergeTextScript, originalOutput, repeat(" ", gatherParts))
}

View File

@ -4,14 +4,32 @@ import collection.JavaConversions._
import org.reflections.util.ManifestAwareClasspathHelper
import java.io.File
import javax.print.URIException
import java.net.{URL, URLClassLoader}
/**
* Builds the correct class path by examining the manifests
*/
object ClasspathUtils {
/**
* Returns a list of files that build up the classpath, taking into account jar file manifests.
* @return List[File] that build up the current classpath.
*/
def manifestAwareClassPath = {
var urls = ManifestAwareClasspathHelper.getUrlsForManifestCurrentClasspath
var files = urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)})
files.mkString(File.pathSeparator)
urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)})
}
/**
* Adds the directory to the system class loader classpath using reflection.
* HACK: Uses reflection to modify the class path, and assumes loader is a URLClassLoader
* @param path Directory to add to the system class loader classpath.
*/
def addClasspath(path: File): Unit = {
val url = path.toURI.toURL
val method = classOf[URLClassLoader].getDeclaredMethod("addURL", classOf[URL]);
if (!method.isAccessible)
method.setAccessible(true);
method.invoke(ClassLoader.getSystemClassLoader(), url);
}
}

View File

@ -1,18 +1,16 @@
package org.broadinstitute.sting.queue.util
/**
* Utilities that try to deeply apply operations to collections
* Utilities that try to deeply apply operations to collections, specifically Traversable and Option.
*/
object CollectionUtils {
def test(value: Any, f: Any => Boolean): Boolean = {
var result = f(value)
foreach(value, (item, collection) => {
result |= f(item)
})
result
}
/**
* Loops though a collection running the function f on each value.
* @param value The value to run f on, or a collection of values for which f should be run on.
* @param f The function to run on value, or to run on the values within the collection.
* @return The updated value.
*/
def updated(value: Any, f: Any => Any): Any = {
value match {
case traversable: Traversable[_] => traversable.map(updated(_, f))
@ -21,6 +19,11 @@ object CollectionUtils {
}
}
/**
* Utility for recursively processing collections.
* @param value Initial the collection to be processed
* @param f a function that will be called for each (item, collection) in the initial collection
*/
def foreach(value: Any, f: (Any, Any) => Unit): Unit = {
value match {
case traversable: Traversable[_] =>
@ -37,11 +40,24 @@ object CollectionUtils {
}
}
// Because scala allows but throws NPE when trying to hash a collection with a null in it.
// http://thread.gmane.org/gmane.comp.lang.scala.internals/3267
// https://lampsvn.epfl.ch/trac/scala/ticket/2935
def removeNullOrEmpty[T](value: T): T = filterNotNullOrNotEmpty(value)
/**
* Utility for recursively processing collections.
* @param value Initial the collection to be processed
* @param f a function that will be called for each (item, collection) in the initial collection
*/
def foreach(value: Any, f: (Any) => Unit): Unit = {
value match {
case traversable: Traversable[_] => traversable.foreach(f(_))
case option: Option[_] => option.foreach(f(_))
case item => f(item)
}
}
/**
* Removes empty values from collections.
* @param value The collection to test.
* @return The value if it is not a collection, otherwise the collection with nulls and empties removed.
*/
private def filterNotNullOrNotEmpty[T](value: T): T = {
val newValue = value match {
case traversable: Traversable[_] => traversable.map(filterNotNullOrNotEmpty(_)).filter(isNotNullOrNotEmpty(_)).asInstanceOf[T]
@ -51,7 +67,20 @@ object CollectionUtils {
newValue
}
private def isNotNullOrNotEmpty(value: Any): Boolean = {
/**
* Returns true if the value is null or an empty collection.
* @param value Value to test for null, or a collection to test if it is empty.
* @return true if the value is null, or false if the collection is empty, otherwise true.
*/
def isNullOrEmpty(value: Any): Boolean = !isNotNullOrNotEmpty(value)
/**
* Returns false if the value is null or an empty collection.
* @param value Value to test for null, or a collection to test if it is empty.
* @return false if the value is null, or false if the collection is empty, otherwise true.
*/
def isNotNullOrNotEmpty(value: Any): Boolean = {
val result = value match {
case traversable: Traversable[_] => !filterNotNullOrNotEmpty(traversable).isEmpty
case option: Option[_] => !filterNotNullOrNotEmpty(option).isEmpty

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.queue.util
import java.io.File
/**
* Base class for a command line job.
*/
abstract class CommandLineJob {
var command: String = _
var workingDir: File = _
var inputFile: File = _
var outputFile: File = _
var errorFile: File = _
/**
* Runs the command, either immediately or dispatching it to a compute farm.
* If it is dispatched to a compute farm it should not start until jobs it depends on are finished.
*/
def run()
/**
* Returns the content of a command output.
* @param streamOutput The output of the command.
* @return The content of the command, along with a message if it was truncated.
*/
protected def content(streamOutput: ProcessController.StreamOutput) = {
var content = streamOutput.content
if (streamOutput.contentTruncated)
content += "%n%n<truncated>".format()
content
}
/**
* Returns the ProcessController for this thread.
* @return The ProcessController for this thread.
*/
protected def processController = CommandLineJob.threadProcessController.get
/** A five mb limit of characters for display. */
protected val FIVE_MB = 1024 * 512 * 5;
}
/**
* Base class for a command line job.
*/
object CommandLineJob {
/** Thread local process controller container. */
private val threadProcessController = new ThreadLocal[ProcessController] {
override def initialValue = new ProcessController
}
}

View File

@ -2,30 +2,69 @@ package org.broadinstitute.sting.queue.util
import java.io.{IOException, File}
/**
* A collection of utilities for modifying java.io.
*/
object IOUtils {
/** The current directory "." */
val CURRENT_DIR = new File(".")
def sub(parent: File, subPath: String) = {
val file = new File(subPath)
/**
* Returns the sub path rooted at the parent.
* If the sub path is already absolute, returns the sub path.
* If the parent is the current directory, returns the sub path.
* If the sub bath is the current directory, returns the parent.
* Else returns new File(parent, subPath)
* @param parent The parent directory
* @param path The sub path to append to the parent, if the path is not absolute.
* @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
*/
def subDir(dir: File, path: String): File =
subDir(dir.getAbsoluteFile, new File(path))
/**
* Returns the sub path rooted at the parent.
* If the sub path is already absolute, returns the sub path.
* If the parent is the current directory, returns the sub path.
* If the sub bath is the current directory, returns the parent.
* Else returns new File(parent, subPath)
* @param parent The parent directory
* @param file The sub path to append to the parent, if the path is not absolute.
* @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
*/
def subDir(parent: File, file: File): File = {
if (parent == CURRENT_DIR && file == CURRENT_DIR)
CURRENT_DIR.getCanonicalFile
CURRENT_DIR.getCanonicalFile.getAbsoluteFile
else if (parent == CURRENT_DIR || file.isAbsolute)
file
file.getAbsoluteFile
else if (file == CURRENT_DIR)
parent
parent.getAbsoluteFile
else
new File(parent, subPath)
new File(parent, file.getPath).getAbsoluteFile
}
def temp(prefix: String, suffix: String = "") = {
val tempDir = File.createTempFile(prefix + "-", suffix)
if(!tempDir.delete)
throw new IOException("Could not delete sub file: " + tempDir.getAbsolutePath())
if(!tempDir.mkdir)
throw new IOException("Could not create sub directory: " + tempDir.getAbsolutePath())
tempDir
}
/**
* Resets the parent of the file to the directory.
* @param dir New parent directory.
* @param file Path to the file to be re-rooted.
* @return Absolute path to the new file.
*/
def resetParent(dir: File, file: File) = subDir(dir.getAbsoluteFile, file.getName).getAbsoluteFile
def reset(dir: File, file: File) = sub(dir, file.getName).getAbsoluteFile
def absolute(dir: File, file: File) = sub(dir, file.getPath).getAbsoluteFile
/**
* Creates a scatterGatherTempDir directory with the prefix and optional suffix.
* @param prefix Prefix for the directory name.
* @param suffix Optional suffix for the directory name. Defaults to "".
* @return The created temporary directory.
* @throws IOException if the directory could not be created.
*/
def tempDir(prefix: String, suffix: String = "") = {
val temp = File.createTempFile(prefix + "-", suffix)
if(!temp.delete)
throw new IOException("Could not delete sub file: " + temp.getAbsolutePath())
if(!temp.mkdir)
throw new IOException("Could not create sub directory: " + temp.getAbsolutePath())
temp
}
}

View File

@ -7,25 +7,5 @@ import org.apache.log4j._
*/
trait Logging {
private val className = this.getClass.getName
protected lazy val logger = {
Logging.configureLogging
Logger.getLogger(className)
}
}
object Logging {
private var configured = false
private var level = Level.INFO
def configureLogging = {
if (!configured) {
var root = Logger.getRootLogger
root.addAppender(new ConsoleAppender(new PatternLayout("%-5p %d{HH:mm:ss,SSS} - %m %n")))
root.setLevel(level)
configured = true
}
}
def setDebug = setLevel(Level.DEBUG)
def setTrace = setLevel(Level.TRACE)
private def setLevel(level: Level) = {this.level = level; Logger.getRootLogger.setLevel(level)}
protected lazy val logger = Logger.getLogger(className)
}

View File

@ -0,0 +1,142 @@
package org.broadinstitute.sting.queue.util
import java.util.regex.Pattern
import collection.JavaConversions._
import org.broadinstitute.sting.queue.QException
/**
* An job submitted to LSF. This class is designed to work somewhat like
* java.lang.Process, but has some extensions.
*
* @author A subset of the original BroadCore ported to scala by Khalid Shakir
*/
class LsfJob extends CommandLineJob with Logging {
var name: String = _
var project: String = _
var queue: String = _
var preExecCommand: String = _
var postExecCommand: String = _
var waitForCompletion = false
var extraBsubArgs: List[String] = Nil
var bsubJobId: String = _
/**
* Starts the job. Command must exist. The job will be submitted to LSF.
*/
def run() = {
assert(bsubJobId == null, "LSF job was already started")
assert(command != null, "Command was not set on LSF job")
assert(outputFile != null, "Output file must be set on LSF job")
// capture the output for debugging
val stdinSettings = new ProcessController.InputStreamSettings(null, null)
val stdoutSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false)
val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false)
// This is really nice for debugging, but spits out way too much stuff otherwise!
// log.info("About to execute LSF command: " + StringUtils.join(argArray, " "));
// Get environment vars and strip out LD_ASSUME_KERNEL
// This is necessary since GAP servers on linux 2.4.x kernel and can be removed when
// its no longer true. Only 'classic' LSF queue has 2.4 kernel-based machines.
// launch the bsub job from the current directory
val processSettings = new ProcessController.ProcessSettings(
bsubCommand, environmentVariables, null, stdinSettings, stdoutSettings, stderrSettings, false)
val bsubOutput = processController.exec(processSettings)
if (bsubOutput.exitValue != 0) {
logger.error("Failed to submit LSF job, got exit code %s. Standard error contained: %n%s"
.format(bsubOutput.exitValue, content(bsubOutput.stderr)))
throw new QException("Failed to submit LSF job, got exit code %s.".format(bsubOutput.exitValue))
}
// get the LSF job ID
val matcher = LsfJob.JOB_ID.matcher(bsubOutput.stdout.content)
matcher.find()
bsubJobId = matcher.group
// set job name to LSF_<lsf job id> if not set already
if (name == null)
name = "lsf_job_" + bsubJobId
}
/**
* Generates the bsub command line for this LsfJob.
* @return command line as a Array[String]
*/
def bsubCommand = {
var args = List.empty[String]
args :+= "bsub"
if (name != null) {
args :+= "-J"
args :+= name
}
if (inputFile != null) {
args :+= "-i"
args :+= inputFile.getAbsolutePath
}
args :+= "-o"
args :+= outputFile.getAbsolutePath
if (errorFile != null) {
args :+= "-e"
args :+= errorFile.getAbsolutePath
}
if (queue != null) {
args :+= "-q"
args :+= queue
}
if (project != null) {
args :+= "-P"
args :+= project
}
if (preExecCommand != null) {
args :+= "-E"
args :+= preExecCommand
}
if (postExecCommand != null) {
args :+= "-Ep"
args :+= postExecCommand
}
if (workingDir != null) {
args :+= "-cwd"
args :+= workingDir.getPath
}
if (waitForCompletion) {
args :+= "-K"
}
args ++= extraBsubArgs
args :+= command
args.toArray
}
/**
* Get the list of environment variables and pass into the exec job. We strip
* out LD_ASSUME_KERNEL because it behaves badly when running bsub jobs across
* different versions of the linux OS.
*
* @return array of environment vars in 'name=value' format.
*/
private def environmentVariables =
System.getenv()
.filterNot{case (name, value) => name.equalsIgnoreCase("LD_ASSUME_KERNEL") || value == null}
.toMap
}
object LsfJob {
/** Used to search the stdout for the job id. */
private val JOB_ID = Pattern.compile("\\d+")
}

View File

@ -0,0 +1,360 @@
package org.broadinstitute.sting.queue.util
import java.io._
import scala.collection.mutable.{HashSet, ListMap}
/**
* Facade to Runtime.exec() and java.lang.Process. Handles
* running a process to completion and returns stdout and stderr
* as strings. Creates separate threads for reading stdout and stderr,
* then reuses those threads for each process most efficient use is
* to create one of these and use it repeatedly. Instances are not
* thread-safe, however.
*
* @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir
*/
class ProcessController extends Logging {
// Threads that capture stdout and stderr
private val stdoutCapture = new OutputCapture(ProcessController.STDOUT_KEY)
private val stderrCapture = new OutputCapture(ProcessController.STDERR_KEY)
// Communication channels with output capture threads
/** Holds the stdout and stderr sent to the background capture threads */
private val toCapture = new ListMap[String, ProcessController.CapturedStreamOutput]
/** Holds the results of the capture from the background capture threads.
* May be the content via toCapture or an EmptyStreamOutput if the capture was interrupted. */
private val fromCapture = new ListMap[String, ProcessController.StreamOutput]
// Start the background threads for this controller.
stdoutCapture.start()
stderrCapture.start()
/**
* Executes a command line program with the settings and waits for it to return, processing the output on a background thread.
* @param settings Settings to be run.
* @return The output of the command.
*/
def exec(settings: ProcessController.ProcessSettings): ProcessController.ProcessOutput = {
var builder = new ProcessBuilder(settings.cmdarray:_*)
builder.directory(settings.directory)
if (settings.environment != null) {
val builderEnvironment = builder.environment
builderEnvironment.clear()
settings.environment.foreach{case (name, value) => builderEnvironment.put(name, value)}
}
builder.redirectErrorStream(settings.redirectErrorStream)
var stdout: ProcessController.StreamOutput = null
var stderr: ProcessController.StreamOutput = null
val process = builder.start
ProcessController.running.add(process)
try {
val stdoutSettings = if (settings.stdoutSettings == null) ProcessController.EmptyStreamSettings else settings.stdoutSettings
val stderrSettings = if (settings.stderrSettings == null) ProcessController.EmptyStreamSettings else settings.stderrSettings
toCapture.synchronized {
toCapture.put(ProcessController.STDOUT_KEY, new ProcessController.CapturedStreamOutput(process.getInputStream, stdoutSettings))
toCapture.put(ProcessController.STDERR_KEY, new ProcessController.CapturedStreamOutput(process.getErrorStream, stderrSettings))
toCapture.notifyAll()
}
if (settings.stdinSettings.input != null) {
val writer = new OutputStreamWriter(process.getOutputStream)
writer.write(settings.stdinSettings.input)
writer.flush()
}
if (settings.stdinSettings.inputFile != null) {
val reader = new FileReader(settings.stdinSettings.inputFile)
val writer = new OutputStreamWriter(process.getOutputStream)
val buf = new Array[Char](4096)
var readCount = 0
while ({readCount = reader.read(buf); readCount} >= 0)
writer.write(buf, 0, readCount)
writer.flush()
reader.close()
}
try {
process.getOutputStream.close()
process.waitFor()
} finally {
while (stdout == null || stderr == null) {
fromCapture.synchronized {
fromCapture.remove(ProcessController.STDOUT_KEY) match {
case Some(stream) => stdout = stream
case None => /* ignore */
}
fromCapture.remove(ProcessController.STDERR_KEY) match {
case Some(stream) => stderr = stream
case None => /* ignore */
}
try {
if (stdout == null || stderr == null)
fromCapture.wait()
} catch {
case e: InterruptedException =>
logger.error(e)
}
}
}
}
} finally {
ProcessController.running.remove(process)
}
new ProcessController.ProcessOutput(process.exitValue, stdout, stderr)
}
/** Ensures that the threads used to manipulate the IO for the process are cleaned up properly. */
def close() = {
try {
stdoutCapture.interrupt()
stderrCapture.interrupt()
} catch {
case e =>
logger.error(e)
}
}
/** calls close() */
override def finalize = close()
/**
* Reads in the output of a stream on a background thread to keep the output pipe from backing up and freezing the called process.
* @param key The stdout or stderr key for this output capture.
*/
private class OutputCapture(private val key: String)
extends Thread("OutputCapture-" + key + "-" + Thread.currentThread.getName) {
setDaemon(true)
/** Runs the capture. */
override def run = {
var break = false
while (!break) {
var processStream: ProcessController.StreamOutput = ProcessController.EmptyStreamOutput
try {
// Wait for a new input stream to be passed from this process controller.
var capturedProcessStream: ProcessController.CapturedStreamOutput = null
while (capturedProcessStream == null) {
toCapture.synchronized {
toCapture.remove(key) match {
case Some(stream) => capturedProcessStream = stream
case None => toCapture.wait()
}
}
}
// Read in the input stream
processStream = capturedProcessStream
capturedProcessStream.read
} catch {
case e: InterruptedException => {
logger.info("OutputReader interrupted, exiting")
break = true
}
case e: IOException => {
logger.error("Error reading process output", e)
}
} finally {
// Send the string back to the process controller.
fromCapture.synchronized {
fromCapture.put(key, processStream)
fromCapture.notify()
}
}
}
}
}
}
/**
* Facade to Runtime.exec() and java.lang.Process. Handles
* running a process to completion and returns stdout and stderr
* as strings. Creates separate threads for reading stdout and stderr,
* then reuses those threads for each process most efficient use is
* to create one of these and use it repeatedly. Instances are not
* thread-safe, however.
*
* @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir
*/
object ProcessController extends Logging {
/**
* Settings that define how to run a process.
* @param cmdarray Command line to run.
* @param environment Environment settings to override System.getEnv, or null to use System.getEnv.
* @param directory The directory to run the command in, or null to run in the current directory.
* @param stdinSettings Settings for writing to the process stdin.
* @param stdoutSettings Settings for capturing the process stdout.
* @param stderrSettings Setting for capturing the process stderr.
* @param redirectErrorStream true if stderr should be sent to stdout.
*/
class ProcessSettings(val cmdarray: Array[String], val environment: Map[String, String], val directory: File,
val stdinSettings: InputStreamSettings, val stdoutSettings: OutputStreamSettings,
val stderrSettings: OutputStreamSettings, val redirectErrorStream: Boolean)
/**
* Settings that define text to write to the process stdin.
* @param input String to write to stdin.
* @param inputFile File to write to stdin.
*/
class InputStreamSettings(val input: String, val inputFile: File)
/**
* Settings that define text to capture from a process stream.
* @param stringSize The number of characters to capture, or -1 for unlimited.
* @param outputFile The file to write output to, or null to skip output.
* @param outputFileAppend true if the output file should be appended to.
*/
class OutputStreamSettings(val stringSize: Int, val outputFile: File, val outputFileAppend: Boolean)
/**
* The output of a process.
* @param exitValue The exit value.
* @param stdout The capture of stdout as defined by the stdout OutputStreamSettings.
* @param stderr The capture of stderr as defined by the stderr OutputStreamSettings.
*/
class ProcessOutput(val exitValue: Int, val stdout: StreamOutput, val stderr: StreamOutput)
/**
* The base class of stream output.
*/
abstract class StreamOutput {
/**
* Returns the content as a string.
* @return The content as a string.
*/
def content: String
/**
* Returns true if the content was truncated.
* @return true if the content was truncated.
*/
def contentTruncated: Boolean
}
private var currentCaptureId = 0
/**
* Returns the next output capture id.
* @return The next output capture id.
*/
private def NEXT_OUTPUT_CAPTURE_ID = {
currentCaptureId += 1
currentCaptureId
}
private val STDOUT_KEY = "stdout"
private val STDERR_KEY = "stderr"
/** Tracks running processes so that they can be killed as the JVM shuts down. */
private val running = new HashSet[Process]()
Runtime.getRuntime.addShutdownHook(new Thread {
/** Kills running processes as the JVM shuts down. */
override def run = for (process <- running.clone) {
logger.warn("Killing: " + process)
process.destroy
}
})
/** Empty stream settings used when no output is requested. */
private object EmptyStreamSettings extends OutputStreamSettings(0, null, false)
/** Empty stream output when no output is captured due to an error. */
private object EmptyStreamOutput extends StreamOutput {
def content = ""
def contentTruncated = false
}
/**
* Stream output captured from a stream.
* @param stream Stream to capture output.
* @param settings Settings that define what to capture.
*/
private class CapturedStreamOutput(val stream: InputStream, val settings: OutputStreamSettings) extends StreamOutput {
/**
* Returns the captured content as a string.
* @return The captured content as a string.
*/
def content = stringWriter.toString()
/**
* Returns true if the captured content was truncated.
* @return true if the captured content was truncated.
*/
def contentTruncated = stringTruncated
/**
* Drain the input stream to keep the process from backing up until it's empty.
*/
def read() = {
val reader = new InputStreamReader(stream)
val buf = new Array[Char](4096)
var readCount = 0
while ({readCount = reader.read(buf); readCount} >= 0) {
writeString(buf, readCount)
writeFile(buf, readCount)
}
closeFile()
stream.close()
}
/** The string to write capture content. */
private lazy val stringWriter = if (settings.stringSize < 0) new StringWriter else new StringWriter(settings.stringSize)
/** True if the content is truncated. */
private var stringTruncated = false
/** The number of characters left until the buffer is full. */
private var stringRemaining = settings.stringSize
/**
* Writes the buffer to the stringWriter up to stringRemaining characters.
* @param chars Character buffer to write.
* @param len Number of characters in the buffer.
*/
private def writeString(chars: Array[Char], len: Int) = {
if (settings.stringSize < 0) {
stringWriter.write(chars, 0, len)
} else {
if (!stringTruncated) {
stringWriter.write(chars, 0, if (len > stringRemaining) stringRemaining else len)
stringRemaining -= len
if (stringRemaining < 0)
stringTruncated = true
}
}
}
/** The file writer to capture content or null if no output file was requested. */
private lazy val fileWriter = {
if (settings.outputFile == null) {
null
} else {
new FileWriter(settings.outputFile, settings.outputFileAppend)
}
}
/**
* Writes the buffer to the fileWriter if it is not null.
* @param chars Character buffer to write.
* @param len Number of characters in the buffer.
*/
private def writeFile(chars: Array[Char], len: Int) = {
if (fileWriter != null)
fileWriter.write(chars, 0, len)
}
/** Closes the fileWriter if it is not null. */
private def closeFile() = {
if (fileWriter != null) {
fileWriter.flush
fileWriter.close
}
}
}
}

View File

@ -1,43 +0,0 @@
package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.utils.text.XReadLines
import collection.mutable.ListBuffer
import collection.JavaConversions._
import java.io.File
object ProcessUtils extends Logging {
Runtime.getRuntime.addShutdownHook(new Thread {
override def run = for (process <- running.clone) {
logger.warn("Killing: " + process)
process.destroy
}
})
val running = new ListBuffer[Process]()
def runCommandAndWait(command: String, directory: File) = {
logger.debug("Running command: " + command)
var builder = new ProcessBuilder("sh", "-c", command).directory(directory)
var process = builder.start
running += process
var result = process.waitFor
running -= process
if (logger.isDebugEnabled) {
for (line <- new XReadLines(process.getInputStream).iterator) {
logger.debug("command: " + line)
}
for (line <- new XReadLines(process.getErrorStream).iterator) {
logger.error("command: " + line)
}
}
logger.debug("Command exited with result: " + result)
result
}
}

View File

@ -2,67 +2,90 @@ package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.queue.QException
import java.lang.annotation.Annotation
import scala.concurrent.JavaConversions._
import java.lang.reflect.{ParameterizedType, Field}
import org.broadinstitute.sting.commandline.ClassType
import org.broadinstitute.sting.utils.classloader.JVMUtils
/**
* A collection of scala extensions to the Sting JVMUtils.
*/
object ReflectionUtils {
/**
* Returns true if field has the annotation.
* @param field Field to check.
* @param annotation Class of the annotation to look for.
* @return true if field has the annotation.
*/
def hasAnnotation(field: Field, annotation: Class[_ <: Annotation]) = field.getAnnotation(annotation) != null
/**
* Gets the annotation or throws an exception if the annotation is not found.
* @param field Field to check.
* @param annotation Class of the annotation to look for.
* @return The annotation.
*/
def getAnnotation[T <: Annotation](field: Field, annotation: Class[T]): T = {
if (!hasAnnotation(field, annotation))
throw new QException("Field %s is missing annotation %s".format(field, annotation))
field.getAnnotation(annotation).asInstanceOf[T]
}
/**
* Returns all the declared fields on a class in order of sub type to super type.
* @param clazz Base class to start looking for fields.
* @return List[Field] found on the class and all super classes.
*/
def getAllFields(clazz: Class[_]) = getAllTypes(clazz).map(_.getDeclaredFields).flatMap(_.toList)
def filterFields(fields: List[Field], annotation: Class[_ <: Annotation]) = fields.filter(field => hasAnnotation(field, annotation))
def getFieldValues(obj: AnyRef, fields: List[Field]) = fields.map(field => fieldGetter(field).invoke(obj))
/**
* Gets all the types on a class in order of sub type to super type.
* @param clazz Base class.
* @return List[Class] including the class and all super classes.
*/
def getAllTypes(clazz: Class[_]) = {
var types = List.empty[Class[_]]
var c = clazz
while (c != null) {
types :+= c
c = c.getSuperclass
}
var c = clazz
while (c != null) {
types :+= c
c = c.getSuperclass
}
types
}
def getValue(obj: AnyRef, field: Field) = fieldGetter(field).invoke(obj)
def setValue(obj: AnyRef, field: Field, value: Any) = fieldSetter(field).invoke(obj, value.asInstanceOf[AnyRef])
def addOrUpdateWithStringValue(obj: AnyRef, field: Field, value: String) = {
val getter = fieldGetter(field)
val setter = fieldSetter(field)
if (classOf[Seq[_]].isAssignableFrom(field.getType)) {
val fieldType = getCollectionType(field)
val typeValue = coerce(fieldType, value)
var list = getter.invoke(obj).asInstanceOf[Seq[_]]
list :+= typeValue
setter.invoke(obj, list)
} else if (classOf[Option[_]].isAssignableFrom(field.getType)) {
val fieldType = getCollectionType(field)
val typeValue = coerce(fieldType, value)
setter.invoke(obj, Some(typeValue))
} else {
val fieldType = field.getType
val typeValue = coerce(fieldType, value)
setter.invoke(obj, typeValue.asInstanceOf[AnyRef])
/**
* Gets a field value using reflection.
* Attempts to use the scala getter then falls back to directly accessing the field.
* @param obj Object to inspect.
* @param field Field to retrieve.
* @return The field value.
*/
def getValue(obj: AnyRef, field: Field): AnyRef =
try {
field.getDeclaringClass.getMethod(field.getName).invoke(obj)
} catch {
case e: NoSuchMethodException => JVMUtils.getFieldValue(field, obj)
}
}
/**
* Sets a field value using reflection.
* Attempts to use the scala setter then falls back to directly accessing the field.
* @param obj Object to inspect.
* @param field Field to set.
* @param value The new field value.
*/
def setValue(obj: AnyRef, field: Field, value: Any) =
try {
field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType).invoke(obj, value.asInstanceOf[AnyRef])
} catch {
case e: NoSuchMethodException => JVMUtils.setFieldValue(field, obj, value)
}
/**
* Returns the collection type of a field or throws an exception if the field contains more than one parameterized type, or the collection type cannot be found.
* @param field Field to retrieve the collection type.
* @return The collection type for the field.
*/
def getCollectionType(field: Field) = {
getGenericTypes(field) match {
case Some(classes) =>
@ -70,10 +93,15 @@ object ReflectionUtils {
throw new IllegalArgumentException("Field contains more than one generic type: " + field)
classes(0)
case None =>
throw new QException("Generic type not set for collection: " + field)
throw new QException("Generic type not set for collection. Did it declare an @ClassType?: " + field)
}
}
/**
* Returns the generic types for a field or None.
* @param field Field to retrieve the collection type.
* @return The array of classes that are in the collection type, or None if the type cannot be found.
*/
private def getGenericTypes(field: Field): Option[Array[Class[_]]] = {
// TODO: Refactor: based on java code in org.broadinstitute.sting.commandline.ArgumentTypeDescriptor
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
@ -85,39 +113,4 @@ object ReflectionUtils {
}
else None
}
private def fieldGetter(field: Field) =
try {
field.getDeclaringClass.getMethod(field.getName)
} catch {
case e: NoSuchMethodException => throw new QException("Field may be private? Unable to find getter for field: " + field)
}
private def fieldSetter(field: Field) =
try {
field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType)
} catch {
case e: NoSuchMethodException => throw new QException("Field may be a val instead of var? Unable to find setter for field: " + field)
}
private def coerce(clazz: Class[_], value: String) = {
if (classOf[String] == clazz) value
else if (classOf[Boolean] == clazz) value.toBoolean
else if (classOf[Byte] == clazz) value.toByte
else if (classOf[Short] == clazz) value.toShort
else if (classOf[Int] == clazz) value.toInt
else if (classOf[Long] == clazz) value.toLong
else if (classOf[Float] == clazz) value.toFloat
else if (classOf[Double] == clazz) value.toDouble
else if (hasStringConstructor(clazz))
clazz.getConstructor(classOf[String]).newInstance(value)
else throw new QException("Unable to coerce value '%s' to type '%s'.".format(value, clazz))
}
private def hasStringConstructor(clazz: Class[_]) = {
clazz.getConstructors.exists(constructor => {
val parameters = constructor.getParameterTypes
parameters.size == 1 && parameters.head == classOf[String]
})
}
}

View File

@ -0,0 +1,71 @@
package org.broadinstitute.sting.queue.util
import collection.JavaConversions._
import org.broadinstitute.sting.queue.QException
import java.lang.Class
import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor}
/**
* An ArgumentTypeDescriptor that can parse the scala collections.
*/
class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* Checks if the class type is a scala collection.
* @param classType Class type to check.
* @return true if the class is a List, Set, or an Option.
*/
def supports(classType: Class[_]) = isCompound(classType)
/**
* Checks if the class type is a scala collection.
* @param source Argument source to check.
* @return true if the source is a List, Set, or an Option.
*/
override def isMultiValued(source: ArgumentSource) = isCompound(source.field.getType)
/**
* Checks if the class type is a scala collection.
* @param classType Class type to check.
* @return true if the class is a List, Set, or an Option.
*/
private def isCompound(classType: Class[_]) = {
classOf[List[_]].isAssignableFrom(classType) ||
classOf[Set[_]].isAssignableFrom(classType) ||
classOf[Option[_]].isAssignableFrom(classType)
}
/**
* Parses the argument matches based on the class type of the argument source's field.
* @param source Argument source that contains the field being populated.
* @param classType Class type being parsed.
* @param argumentMatches The argument match strings that were found for this argument source.
* @return The parsed object.
*/
def parse(source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
val componentType = ReflectionUtils.getCollectionType(source.field)
val componentArgumentParser = ArgumentTypeDescriptor.create(componentType)
if (classOf[List[_]].isAssignableFrom(classType)) {
var list = List.empty[Any]
for (argumentMatch <- argumentMatches)
for (value <- argumentMatch)
list :+= componentArgumentParser.parse(source, componentType, new ArgumentMatches(value))
list
} else if (classOf[Set[_]].isAssignableFrom(classType)) {
var set = Set.empty[Any]
for (argumentMatch <- argumentMatches)
for (value <- argumentMatch)
set += componentArgumentParser.parse(source, componentType, new ArgumentMatches(value))
set
} else if (classOf[Option[_]].isAssignableFrom(classType)) {
if (argumentMatches.size > 1)
throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" "))
else if (argumentMatches.size == 1)
Some(componentArgumentParser.parse(source, componentType, argumentMatches))
else
None
} else
throw new QException("Unsupported compound argument type: " + classType)
}
}

View File

@ -0,0 +1,37 @@
package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.queue.QException
/**
* Runs a job on the command line by invoking "sh -c <command>"
*/
class ShellJob extends CommandLineJob with Logging {
/**
* Runs the command and waits for the output.
*/
def run() = {
assert(command != null, "Command was not set on job")
val (redirectError, errorFile) = if (this.errorFile == null) (true, null) else (false, this.errorFile)
val bufferSize = if (logger.isDebugEnabled) FIVE_MB else 0
val stdinSettings = new ProcessController.InputStreamSettings(null, this.inputFile)
val stdoutSettings = new ProcessController.OutputStreamSettings(bufferSize, this.outputFile, true)
val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, errorFile, true)
val processSettings = new ProcessController.ProcessSettings(
Array("sh", "-c", command), null, this.workingDir, stdinSettings, stdoutSettings, stderrSettings, redirectError)
val output = processController.exec(processSettings)
if (logger.isDebugEnabled) {
logger.debug("output: " + content(output.stdout))
logger.debug("error: " + content(output.stderr))
logger.debug("Command exited with result: " + output.exitValue)
}
if (output.exitValue != 0) {
logger.error("Failed to run job, got exit code %s. Standard error contained: %n%s"
.format(output.exitValue, content(output.stderr)))
throw new QException("Failed to run job, got exit code %s.".format(output.exitValue))
}
}
}

View File

@ -6,7 +6,9 @@
<artifact pattern="${repository.dir}/[organisation]/[artifact]-[revision].[ext]" />
<ivy pattern="${repository.dir}/[organisation]/[module]-[revision].xml" />
</filesystem>
<ibiblio name="libraries" m2compatible="true" usepoms="false" />
<ibiblio name="libraries" m2compatible="true" />
<ibiblio name="libraries_with_inconsistent_poms" checkconsistency="false" m2compatible="true" />
<ibiblio name="ukcomrobust-it" m2compatible="true" root="http://oss.sonatype.org/service/local/repositories/ukcomrobust-it-049/content" />
</resolvers>
<modules>
<module organisation="edu.mit.broad" resolver="projects" />
@ -15,5 +17,8 @@
<module organisation="org.reflections" module="reflections" resolver="projects" />
<module organisation="org.broad" module="tribble" resolver="projects" />
<module organisation="gov.nist" module="Jama" resolver="projects" />
<module organisation="uk.com.robust-it" module="cloning" resolver="ukcomrobust-it" />
<!-- If colt fixes the version in the pom for 1.2.0 then this line can be removed. -->
<module organisation="colt" module="colt" resolver="libraries_with_inconsistent_poms" />
</modules>
</ivysettings>

View File

@ -1,8 +0,0 @@
<ivy-module version="1.0">
<!-- .jar borrowed from https://svn.broadinstitute.org/CancerGenomeAnalysis/trunk/analysis_pipeline/process_management/lib/broad-core-all-2.8.jar -->
<info organisation="edu.mit.broad" module="broad-core-all" revision="2.8" status="integration" publication="20100522172500" />
<!-- BroadCore has a lot more dependencies. For now we are only using edu.mit.broad.core.lsf which depends on apache commons-logging -->
<dependencies>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1" />
</dependencies>
</ivy-module>

View File

@ -1,3 +1,12 @@
<ivy-module version="1.0">
<info organisation="org.reflections" module="reflections" revision="0.9.5-svnversion79M_mod2" status="release" />
<dependencies defaultconf="default">
<dependency org="javassist" name="javassist" rev="3.8.0.GA"/>
<dependency org="ch.qos.logback" name="logback-core" rev="0.9.9"/>
<dependency org="ch.qos.logback" name="logback-classic" rev="0.9.9"/>
<dependency org="org.slf4j" name="slf4j-api" rev="1.5.6"/>
<dependency org="xml-apis" name="xml-apis" rev="1.0.b2"/>
<dependency org="com.google.collections" name="google-collections" rev="1.0"/>
<dependency org="dom4j" name="dom4j" rev="1.6"/>
</dependencies>
</ivy-module>