diff --git a/build.xml b/build.xml
index dda03c288..7f69a3a12 100644
--- a/build.xml
+++ b/build.xml
@@ -7,7 +7,14 @@
+
+
+
+
+
+
+
@@ -24,9 +31,7 @@
-
-
@@ -55,21 +60,28 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
+
+
-
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+ Building Queue...
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Generating Queue GATK extensions...
+
+
+
+
+ Building Queue GATK extensions...
+
+
+
+
+
+
+
+
+
+
+ additionalparam="-build-timestamp "${build.timestamp}" -version-suffix .${build.version} -out ${basedir}/${resource.path}">
-
+
+
+
@@ -141,14 +249,20 @@
+
+
+
+
+
@@ -193,12 +307,46 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -206,12 +354,6 @@
-
-
-
-
-
-
@@ -232,29 +374,57 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
+
+
+
+
+
+
+
+
@@ -300,46 +470,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Building Queue...
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/ivy.xml b/ivy.xml
index 3c9e6a4b0..cebf26a86 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -8,46 +8,40 @@
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
-
+
-
+
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
+
+
-
-
+
@@ -56,5 +50,8 @@
+
+
+
diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
index 143c3e6d3..91cbf5f8c 100755
--- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@@ -25,6 +25,7 @@
package org.broadinstitute.sting.analyzecovariates;
+import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.walkers.recalibration.*;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
@@ -51,7 +52,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
// Command Line Arguments
/////////////////////////////
- @Argument(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
+ @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeCovariates/";
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java b/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
index f206aac58..000d540fc 100644
--- a/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
@@ -40,6 +40,11 @@ public class ArgumentDefinition {
*/
public final ArgumentIOType ioType;
+ /**
+ * The class of the argument.
+ */
+ public final Class argumentType;
+
/**
* Full name of the argument. Must have a value.
*/
@@ -70,6 +75,11 @@ public class ArgumentDefinition {
*/
public final boolean isMultiValued;
+ /**
+ * The class of the componentType. Not used for scalars.
+ */
+ public final Class componentType;
+
/**
* Is this argument hidden from the help system?
*/
@@ -93,35 +103,41 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param ioType Whether the argument is an input or an output.
+ * @param argumentType The class of the field.
* @param fullName Full name for this argument definition.
* @param shortName Short name for this argument definition.
* @param doc Doc string for this argument.
* @param required Whether or not this argument is required.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
+ * @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param exclusiveOf Whether this command line argument is mutually exclusive of other arguments.
* @param validation A regular expression for command-line argument validation.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( ArgumentIOType ioType,
+ Class argumentType,
String fullName,
String shortName,
String doc,
boolean required,
boolean isFlag,
boolean isMultiValued,
+ Class componentType,
boolean isHidden,
String exclusiveOf,
String validation,
List validOptions) {
this.ioType = ioType;
+ this.argumentType = argumentType;
this.fullName = fullName;
this.shortName = shortName;
this.doc = doc;
this.required = required;
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
+ this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = exclusiveOf;
this.validation = validation;
@@ -131,18 +147,22 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param annotation The annotation on the field.
+ * @param argumentType The class of the field.
* @param defaultFullName Default full name for this argument definition.
* @param defaultShortName Default short name for this argument definition.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
+ * @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( Annotation annotation,
+ Class argumentType,
String defaultFullName,
String defaultShortName,
boolean isFlag,
boolean isMultiValued,
+ Class componentType,
boolean isHidden,
List validOptions) {
@@ -162,13 +182,15 @@ public class ArgumentDefinition {
else
shortName = null;
- this.ioType = getIOType(annotation);
+ this.ioType = ArgumentIOType.getIOType(annotation);
+ this.argumentType = argumentType;
this.fullName = fullName;
this.shortName = shortName;
this.doc = getDoc(annotation);
this.required = isRequired(annotation, isFlag);
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
+ this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = getExclusiveOf(annotation);
this.validation = getValidationRegex(annotation);
@@ -178,25 +200,31 @@ public class ArgumentDefinition {
/**
* Creates a new argument definition.
* @param annotation The annotation on the field.
+ * @param argumentType The class of the field.
* @param fieldName Default full name for this argument definition.
* @param isFlag Whether or not this argument should be treated as a flag.
* @param isMultiValued Whether or not this argument supports multiple values.
+ * @param componentType For multivalued arguments the type of the components.
* @param isHidden Whether or not this argument should be hidden from the command-line argument system.
* @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null.
*/
public ArgumentDefinition( Annotation annotation,
+ Class argumentType,
String fieldName,
boolean isFlag,
boolean isMultiValued,
+ Class componentType,
boolean isHidden,
List validOptions) {
- this.ioType = getIOType(annotation);
+ this.ioType = ArgumentIOType.getIOType(annotation);
+ this.argumentType = argumentType;
this.fullName = getFullName(annotation, fieldName);
this.shortName = getShortName(annotation);
this.doc = getDoc(annotation);
this.required = isRequired(annotation, isFlag);
this.isFlag = isFlag;
this.isMultiValued = isMultiValued;
+ this.componentType = componentType;
this.isHidden = isHidden;
this.exclusiveOf = getExclusiveOf(annotation);
this.validation = getValidationRegex(annotation);
@@ -222,17 +250,6 @@ public class ArgumentDefinition {
Utils.equals(shortName,other.shortName);
}
- /**
- * Returns the ArgumentIOType for the annotation.
- * @param annotation @Input or @Output
- * @return ArgumentIOType.Input, Output, or Unknown
- */
- public static ArgumentIOType getIOType(Annotation annotation) {
- if (annotation instanceof Input) return ArgumentIOType.INPUT;
- if (annotation instanceof Output) return ArgumentIOType.OUTPUT;
- return ArgumentIOType.UNKNOWN;
- }
-
/**
* A hack to get around the fact that Java doesn't like inheritance in Annotations.
* @param annotation to run the method on
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java b/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java
index af516004a..03e3066fb 100644
--- a/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java
@@ -24,6 +24,28 @@
package org.broadinstitute.sting.commandline;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.lang.annotation.Annotation;
+
public enum ArgumentIOType {
- INPUT, OUTPUT, UNKNOWN
+ INPUT(Input.class), OUTPUT(Output.class), ARGUMENT(Argument.class);
+
+ public final Class extends Annotation> annotationClass;
+
+ ArgumentIOType(Class extends Annotation> annotationClass) {
+ this.annotationClass = annotationClass;
+ }
+
+ /**
+ * Returns the ArgumentIOType for the annotation.
+ * @param annotation @Input or @Output
+ * @return ArgumentIOType.Input, Output, or Unknown
+ */
+ public static ArgumentIOType getIOType(Annotation annotation) {
+ for (ArgumentIOType ioType: ArgumentIOType.values())
+ if (ioType.annotationClass.isAssignableFrom(annotation.getClass()))
+ return ioType;
+ throw new StingException("Unknown annotation type: " + annotation);
+ }
}
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
new file mode 100755
index 000000000..56bedc012
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import java.util.*;
+
+/**
+ * A mapping of all the sites where an argument definition maps to a site on the command line.
+ */
+public class ArgumentMatch implements Iterable {
+ /**
+ * The argument definition that's been matched.
+ */
+ public final ArgumentDefinition definition;
+
+ /**
+ * The text that's been matched, as it appears in the command line arguments.
+ */
+ public final String label;
+
+ /**
+ * Maps indicies of command line arguments to values paired with that argument.
+ */
+ public final SortedMap> indices = new TreeMap>();
+
+ /**
+ * Create a new argument match, defining its properties later. Used to create invalid arguments.
+ */
+ public ArgumentMatch() {
+ this.label = null;
+ this.definition = null;
+ }
+
+ /**
+ * A simple way of indicating that an argument with the given label and definition exists at this index.
+ * @param label Label of the argument match. Must not be null.
+ * @param definition The associated definition, if one exists. May be null.
+ * @param index Position of the argument. Must not be null.
+ */
+ public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
+ this( label, definition, index, null );
+ }
+
+ private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
+ this.label = label;
+ this.definition = definition;
+
+ ArrayList values = new ArrayList();
+ if( value != null )
+ values.add(value);
+ indices.put(index,values );
+ }
+
+ /**
+ * Return a string representation of the given argument match, for debugging purposes.
+ * @return String representation of the match.
+ */
+ public String toString() {
+ return label;
+ }
+
+ /**
+ * Creates an iterator that walks over each individual match at each position of a given argument.
+ * @return An iterator over the individual matches in this argument. Will not be null.
+ */
+ public Iterator iterator() {
+ return new Iterator() {
+ /**
+ * Iterate over each the available index.
+ */
+ private Iterator indexIterator = null;
+
+ /**
+ * Iterate over each available token.
+ */
+ private Iterator tokenIterator = null;
+
+ /**
+ * The next index to return. Null if none remain.
+ */
+ Integer nextIndex = null;
+
+ /**
+ * The next token to return. Null if none remain.
+ */
+ String nextToken = null;
+
+ {
+ indexIterator = indices.keySet().iterator();
+ prepareNext();
+ }
+
+ /**
+ * Is there a nextToken available to return?
+ * @return True if there's another token waiting in the wings. False otherwise.
+ */
+ public boolean hasNext() {
+ return nextToken != null;
+ }
+
+ /**
+ * Get the next token, if one exists. If not, throw an IllegalStateException.
+ * @return The next ArgumentMatch in the series. Should never be null.
+ */
+ public ArgumentMatch next() {
+ if( nextIndex == null || nextToken == null )
+ throw new IllegalStateException( "No more ArgumentMatches are available" );
+
+ ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
+ prepareNext();
+ return match;
+ }
+
+ /**
+ * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
+ * initialize nextIndex / nextToken to null.
+ */
+ private void prepareNext() {
+ if( tokenIterator != null && tokenIterator.hasNext() ) {
+ nextToken = tokenIterator.next();
+ }
+ else {
+ nextIndex = null;
+ nextToken = null;
+
+ // Do a nested loop. While more data is present in the inner loop, grab that data.
+ // Otherwise, troll the outer iterator looking for more data.
+ while( indexIterator.hasNext() ) {
+ nextIndex = indexIterator.next();
+ if( indices.get(nextIndex) != null ) {
+ tokenIterator = indices.get(nextIndex).iterator();
+ if( tokenIterator.hasNext() ) {
+ nextToken = tokenIterator.next();
+ break;
+ }
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Remove is unsupported in this context.
+ */
+ public void remove() {
+ throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
+ }
+ };
+ }
+
+ /**
+ * Merge two ArgumentMatches, so that the values for all arguments go into the
+ * same data structure.
+ * @param other The other match to merge into.
+ */
+ public void mergeInto( ArgumentMatch other ) {
+ indices.putAll(other.indices);
+ }
+
+ /**
+ * Associate a value with this merge maapping.
+ * @param index index of the command-line argument to which this value is mated.
+ * @param value Text representation of value to add.
+ */
+ public void addValue( int index, String value ) {
+ if( !indices.containsKey(index) || indices.get(index) == null )
+ indices.put(index, new ArrayList() );
+ indices.get(index).add(value);
+ }
+
+ /**
+ * Does this argument already have a value at the given site?
+ * Arguments are only allowed to be single-valued per site, and
+ * flags aren't allowed a value at all.
+ * @param index Index at which to check for values.
+ * @return True if the argument has a value at the given site. False otherwise.
+ */
+ public boolean hasValueAtSite( int index ) {
+ return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
+ }
+
+ /**
+ * Return the values associated with this argument match.
+ * @return A collection of the string representation of these value.
+ */
+ public List values() {
+ List values = new ArrayList();
+ for( int index: indices.keySet() ) {
+ if( indices.get(index) != null )
+ values.addAll(indices.get(index));
+ }
+ return values;
+ }
+
+ /**
+ * Convenience method returning true if the definition is a flag.
+ * @return True if definition is known to be a flag; false if not known to be a flag.
+ */
+ private boolean isArgumentFlag() {
+ return definition != null && definition.isFlag;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
index 3ee544c5f..03978adac 100755
--- a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
@@ -192,200 +192,3 @@ public class ArgumentMatches implements Iterable {
return new HashSet( argumentMatches.values() );
}
}
-
-/**
- * A mapping of all the sites where an argument definition maps to a site on the command line.
- */
-class ArgumentMatch implements Iterable {
- /**
- * The argument definition that's been matched.
- */
- public final ArgumentDefinition definition;
-
- /**
- * The text that's been matched, as it appears in the command line arguments.
- */
- public final String label;
-
- /**
- * Maps indicies of command line arguments to values paired with that argument.
- */
- public final SortedMap> indices = new TreeMap>();
-
- /**
- * Create a new argument match, defining its properties later. Used to create invalid arguments.
- */
- public ArgumentMatch() {
- this.label = null;
- this.definition = null;
- }
-
- /**
- * A simple way of indicating that an argument with the given label and definition exists at this index.
- * @param label Label of the argument match. Must not be null.
- * @param definition The associated definition, if one exists. May be null.
- * @param index Position of the argument. Must not be null.
- */
- public ArgumentMatch( String label, ArgumentDefinition definition, int index ) {
- this( label, definition, index, null );
- }
-
- private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) {
- this.label = label;
- this.definition = definition;
-
- ArrayList values = new ArrayList();
- if( value != null )
- values.add(value);
- indices.put(index,values );
- }
-
- /**
- * Return a string representation of the given argument match, for debugging purposes.
- * @return String representation of the match.
- */
- public String toString() {
- return label;
- }
-
- /**
- * Creates an iterator that walks over each individual match at each position of a given argument.
- * @return An iterator over the individual matches in this argument. Will not be null.
- */
- public Iterator iterator() {
- return new Iterator() {
- /**
- * Iterate over each the available index.
- */
- private Iterator indexIterator = null;
-
- /**
- * Iterate over each available token.
- */
- private Iterator tokenIterator = null;
-
- /**
- * The next index to return. Null if none remain.
- */
- Integer nextIndex = null;
-
- /**
- * The next token to return. Null if none remain.
- */
- String nextToken = null;
-
- {
- indexIterator = indices.keySet().iterator();
- prepareNext();
- }
-
- /**
- * Is there a nextToken available to return?
- * @return True if there's another token waiting in the wings. False otherwise.
- */
- public boolean hasNext() {
- return nextToken != null;
- }
-
- /**
- * Get the next token, if one exists. If not, throw an IllegalStateException.
- * @return The next ArgumentMatch in the series. Should never be null.
- */
- public ArgumentMatch next() {
- if( nextIndex == null || nextToken == null )
- throw new IllegalStateException( "No more ArgumentMatches are available" );
-
- ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken );
- prepareNext();
- return match;
- }
-
- /**
- * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
- * initialize nextIndex / nextToken to null.
- */
- private void prepareNext() {
- if( tokenIterator != null && tokenIterator.hasNext() ) {
- nextToken = tokenIterator.next();
- }
- else {
- nextIndex = null;
- nextToken = null;
-
- // Do a nested loop. While more data is present in the inner loop, grab that data.
- // Otherwise, troll the outer iterator looking for more data.
- while( indexIterator.hasNext() ) {
- nextIndex = indexIterator.next();
- if( indices.get(nextIndex) != null ) {
- tokenIterator = indices.get(nextIndex).iterator();
- if( tokenIterator.hasNext() ) {
- nextToken = tokenIterator.next();
- break;
- }
- }
- }
- }
-
- }
-
- /**
- * Remove is unsupported in this context.
- */
- public void remove() {
- throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating.");
- }
- };
- }
-
- /**
- * Merge two ArgumentMatches, so that the values for all arguments go into the
- * same data structure.
- * @param other The other match to merge into.
- */
- public void mergeInto( ArgumentMatch other ) {
- indices.putAll(other.indices);
- }
-
- /**
- * Associate a value with this merge maapping.
- * @param index index of the command-line argument to which this value is mated.
- * @param value Text representation of value to add.
- */
- public void addValue( int index, String value ) {
- if( !indices.containsKey(index) || indices.get(index) == null )
- indices.put(index, new ArrayList() );
- indices.get(index).add(value);
- }
-
- /**
- * Does this argument already have a value at the given site?
- * Arguments are only allowed to be single-valued per site, and
- * flags aren't allowed a value at all.
- * @param index Index at which to check for values.
- * @return True if the argument has a value at the given site. False otherwise.
- */
- public boolean hasValueAtSite( int index ) {
- return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
- }
-
- /**
- * Return the values associated with this argument match.
- * @return A collection of the string representation of these value.
- */
- public List values() {
- List values = new ArrayList();
- for( int index: indices.keySet() ) {
- if( indices.get(index) != null )
- values.addAll(indices.get(index));
- }
- return values;
- }
-
- /**
- * Convenience method returning true if the definition is a flag.
- * @return True if definition is known to be a flag; false if not known to be a flag.
- */
- private boolean isArgumentFlag() {
- return definition != null && definition.isFlag;
- }
-}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
index 182b1c8a3..635780aa5 100644
--- a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
@@ -28,7 +28,7 @@ package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.gatk.walkers.Hidden;
import java.lang.reflect.Field;
-import java.util.Collection;
+import java.util.Arrays;
import java.util.List;
/**
@@ -41,9 +41,9 @@ import java.util.List;
*/
public class ArgumentSource {
/**
- * Class to which the field belongs.
+ * Field into which to inject command-line arguments.
*/
- public final Class clazz;
+ public final Field[] parentFields;
/**
* Field into which to inject command-line arguments.
@@ -57,11 +57,19 @@ public class ArgumentSource {
/**
* Create a new command-line argument target.
- * @param clazz Class containing the argument.
- * @param field Field containing the argument. Field must be annotated with 'Argument'.
+ * @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'.
*/
- public ArgumentSource( Class clazz, Field field ) {
- this.clazz = clazz;
+ public ArgumentSource( Field field ) {
+ this(new Field[0], field);
+ }
+
+ /**
+ * Create a new command-line argument target.
+ * @param parentFields Parent fields containing the the field. Field must be annotated with 'ArgumentCollection'.
+ * @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'.
+ */
+ public ArgumentSource( Field[] parentFields, Field field ) {
+ this.parentFields = parentFields;
this.field = field;
this.typeDescriptor = ArgumentTypeDescriptor.create( field.getType() );
}
@@ -80,7 +88,7 @@ public class ArgumentSource {
return false;
ArgumentSource otherArgumentSource = (ArgumentSource)other;
- return this.clazz.equals(otherArgumentSource.clazz) && this.field.equals(otherArgumentSource.field);
+ return this.field == otherArgumentSource.field && Arrays.equals(this.parentFields, otherArgumentSource.parentFields);
}
/**
@@ -89,7 +97,7 @@ public class ArgumentSource {
*/
@Override
public int hashCode() {
- return clazz.hashCode() ^ field.hashCode();
+ return field.hashCode();
}
/**
@@ -118,18 +126,11 @@ public class ArgumentSource {
/**
* Parses the specified value based on the specified type.
- * @param source The type of value to be parsed.
* @param values String representation of all values passed.
* @return the parsed value of the object.
*/
- public Object parse( ArgumentSource source, ArgumentMatches values ) {
- Object value = null;
- if( !isFlag() )
- value = typeDescriptor.parse( source, values );
- else
- value = true;
-
- return value;
+ public Object parse( ArgumentMatches values ) {
+ return typeDescriptor.parse( this, values );
}
/**
@@ -145,8 +146,7 @@ public class ArgumentSource {
* @return True if the argument supports multiple values.
*/
public boolean isMultiValued() {
- Class argumentType = field.getType();
- return Collection.class.isAssignableFrom(argumentType) || field.getType().isArray();
+ return typeDescriptor.isMultiValued( this );
}
/**
@@ -162,6 +162,6 @@ public class ArgumentSource {
* @return String representation of the argument source.
*/
public String toString() {
- return clazz.getSimpleName() + ": " + field.getName();
+ return field.getDeclaringClass().getSimpleName() + ": " + field.getName();
}
}
diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 02a46b69d..4993ebfe5 100644
--- a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -113,10 +113,26 @@ public abstract class ArgumentTypeDescriptor {
return Collections.singletonList(createDefaultArgumentDefinition(source));
}
+ /**
+ * Parses an argument source to an object.
+ * @param source The source used to find the matches.
+ * @param matches The matches for the source.
+ * @return The parsed object.
+ */
public Object parse( ArgumentSource source, ArgumentMatches matches ) {
return parse( source, source.field.getType(), matches );
}
+ /**
+ * Returns true if the field is a collection or an array.
+ * @param source The argument source to check.
+ * @return true if the field is a collection or an array.
+ */
+ public boolean isMultiValued( ArgumentSource source ) {
+ Class argumentType = source.field.getType();
+ return Collection.class.isAssignableFrom(argumentType) || argumentType.isArray();
+ }
+
/**
* By default, argument sources create argument definitions with a set of default values.
* Use this method to create the one simple argument definition.
@@ -125,15 +141,41 @@ public abstract class ArgumentTypeDescriptor {
*/
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
return new ArgumentDefinition( getArgumentAnnotation(source),
+ source.field.getType(),
source.field.getName(),
source.isFlag(),
source.isMultiValued(),
+ getCollectionComponentType(source.field),
source.isHidden(),
getValidOptions(source) );
}
- public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches );
+ /**
+ * Return the component type of a field, or String.class if the type cannot be found.
+ * @param field The reflected field to inspect.
+ * @return The parameterized component type, or String.class if the parameterized type could not be found.
+ * @throws IllegalArgumentException If more than one parameterized type is found on the field.
+ */
+ protected Class getCollectionComponentType( Field field ) {
+ // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
+ if( field.getGenericType() instanceof ParameterizedType) {
+ ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
+ if( parameterizedType.getActualTypeArguments().length > 1 )
+ throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
+ return (Class)parameterizedType.getActualTypeArguments()[0];
+ }
+ else
+ return String.class;
+ }
+ /**
+ * Parses the argument matches for a class type into an object.
+ * @param source The original argument source used to find the matches.
+ * @param type The current class type being inspected. May not match the argument source.field.getType() if this as a collection for example.
+ * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
+ * @return The individual parsed object matching the argument match with Class type.
+ */
+ public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches );
/**
* If the argument source only accepts a small set of options, populate the returned list with
@@ -193,6 +235,11 @@ public abstract class ArgumentTypeDescriptor {
throw new StingException("ArgumentAnnotation is not present for the argument field: " + source.field.getName());
}
+ /**
+ * Returns true if an argument annotation is present
+ * @param field The field to check for an annotation.
+ * @return True if an argument annotation is present on the field.
+ */
@SuppressWarnings("unchecked")
public static boolean isArgumentAnnotationPresent(Field field) {
for (Class annotation: ARGUMENT_ANNOTATIONS)
@@ -235,6 +282,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) {
+ if (source.isFlag())
+ return true;
String value = getArgumentValue( createDefaultArgumentDefinition(source), matches );
// lets go through the types we support
@@ -301,7 +350,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public boolean supports( Class type ) {
return ( Collection.class.isAssignableFrom(type) || type.isArray() );
}
-
+
@Override
@SuppressWarnings("unchecked")
public Object parse( ArgumentSource source, Class type, ArgumentMatches matches )
@@ -319,16 +368,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
else if( java.util.Set.class.isAssignableFrom(type) ) type = java.util.TreeSet.class;
}
- // If this is a parameterized collection, find the contained type. If blow up if only one type exists.
- if( source.field.getGenericType() instanceof ParameterizedType) {
- ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType();
- if( parameterizedType.getActualTypeArguments().length > 1 )
- throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString());
- componentType = (Class)parameterizedType.getActualTypeArguments()[0];
- }
- else
- componentType = String.class;
-
+ componentType = getCollectionComponentType( source.field );
ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType );
Collection collection;
diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
index bedca9043..89fd143e2 100644
--- a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@@ -43,11 +43,11 @@ public abstract class CommandLineProgram {
private static Logger logger = Logger.getRootLogger();
/** the default log level */
- @Input(fullName = "logging_level",
+ @Argument(fullName = "logging_level",
shortName = "l",
doc = "Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging.",
required = false)
- protected String logging_level = "WARN";
+ protected String logging_level = "INFO";
/** where to send the output of our logger */
@@ -58,21 +58,21 @@ public abstract class CommandLineProgram {
protected String toFile = null;
/** do we want to silence the command line output */
- @Input(fullName = "quiet_output_mode",
+ @Argument(fullName = "quiet_output_mode",
shortName = "quiet",
doc = "Set the logging to quiet mode, no output to stdout",
required = false)
protected Boolean quietMode = false;
/** do we want to generate debugging information with the logs */
- @Input(fullName = "debug_mode",
+ @Argument(fullName = "debug_mode",
shortName = "debug",
doc = "Set the logging file string to include a lot of debugging information (SLOW!)",
required = false)
protected Boolean debugMode = false;
/** this is used to indicate if they've asked for help */
- @Input(fullName = "help", shortName = "h", doc = "Generate this help message", required = false)
+ @Argument(fullName = "help", shortName = "h", doc = "Generate this help message", required = false)
public Boolean help = false;
/** our logging output patterns */
@@ -146,6 +146,7 @@ public abstract class CommandLineProgram {
* @param clp the command line program to execute
* @param args the command line arguments passed in
*/
+ @SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) {
try {
@@ -174,14 +175,14 @@ public abstract class CommandLineProgram {
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
parser.parse(args);
- if (isHelpPresent(clp, parser))
+ if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
} else {
parser.parse(args);
- if (isHelpPresent(clp, parser))
+ if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
@@ -216,7 +217,7 @@ public abstract class CommandLineProgram {
// if they specify a log location, output our data there
if (clp.toFile != null) {
- FileAppender appender = null;
+ FileAppender appender;
try {
appender = new FileAppender(layout, clp.toFile, false);
logger.addAppender(appender);
@@ -258,7 +259,7 @@ public abstract class CommandLineProgram {
*/
private static void toErrorLog(CommandLineProgram clp, Exception e) {
File logFile = new File("GATK_Error.log");
- PrintStream stream = null;
+ PrintStream stream;
try {
stream = new PrintStream(logFile);
} catch (Exception e1) { // catch all the exceptions here, if we can't create the file, do the alternate path
@@ -279,22 +280,12 @@ public abstract class CommandLineProgram {
parser.loadArgumentsIntoObject(obj);
}
- /**
- * a manual way to load argument providing objects into the program
- *
- * @param clp the command line program
- * @param cls the class to load the arguments off of
- */
- public void loadAdditionalSource(CommandLineProgram clp, Class cls) {
- parser.addArgumentSource(clp.getArgumentSourceName(cls), cls);
- }
-
/**
* this function checks the logger level passed in on the command line, taking the lowest
* level that was provided.
*/
private void setupLoggerLevel() {
- Level par = Level.WARN;
+ Level par;
if (logging_level.toUpperCase().equals("DEBUG")) {
par = Level.DEBUG;
} else if (logging_level.toUpperCase().equals("ERROR")) {
@@ -316,9 +307,9 @@ public abstract class CommandLineProgram {
}
/**
- * a function used to indicate an error occured in the command line tool
+ * a function used to indicate an error occurred in the command line tool
*
- * @param msg
+ * @param msg message to display
*/
private static void printExitSystemMsg(final String msg) {
System.out.printf("The following error has occurred:%n%n");
@@ -334,12 +325,11 @@ public abstract class CommandLineProgram {
/**
* Do a cursory search for the given argument.
*
- * @param clp Instance of the command-line program.
* @param parser Parser
*
* @return True if help is present; false otherwise.
*/
- private static boolean isHelpPresent(CommandLineProgram clp, ParsingEngine parser) {
+ private static boolean isHelpPresent(ParsingEngine parser) {
return parser.isArgumentPresent("help");
}
diff --git a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
index 2055faea9..d48123a4d 100755
--- a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@@ -270,26 +270,38 @@ public class ParsingEngine {
return;
// Target instance into which to inject the value.
- List targets = new ArrayList();
-
- // Check to see whether the instance itself can be the target.
- if( source.clazz.isAssignableFrom(instance.getClass()) ) {
- targets.add(instance);
- }
-
- // Check to see whether a contained class can be the target.
- targets.addAll(getContainersMatching(instance,source.clazz));
+ Collection targets = findTargets( source, instance );
// Abort if no home is found for the object.
if( targets.size() == 0 )
throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches);
for( Object target: targets ) {
- Object value = (argumentMatches.size() != 0) ? source.parse(source,argumentMatches) : source.getDefault();
+ Object value = (argumentMatches.size() != 0) ? source.parse(argumentMatches) : source.getDefault();
JVMUtils.setFieldValue(source.field,target,value);
}
}
+ /**
+ * Gets a collection of the container instances of the given type stored within the given target.
+ * @param source Argument source.
+ * @param instance Container.
+ * @return A collection of containers matching the given argument source.
+ */
+ private Collection findTargets(ArgumentSource source, Object instance) {
+ LinkedHashSet targets = new LinkedHashSet();
+ for( Class clazz = instance.getClass(); clazz != null; clazz = clazz.getSuperclass() ) {
+ for( Field field: clazz.getDeclaredFields() ) {
+ if( field.equals(source.field) ) {
+ targets.add(instance);
+ } else if( field.isAnnotationPresent(ArgumentCollection.class) ) {
+ targets.addAll(findTargets(source, JVMUtils.getFieldValue(field, instance)));
+ }
+ }
+ }
+ return targets;
+ }
+
/**
* Prints out the help associated with these command-line argument definitions.
* @param applicationDetails Details about the specific GATK-based application being run.
@@ -303,15 +315,22 @@ public class ParsingEngine {
* @param sourceClass class to act as sources for other arguments.
* @return A list of sources associated with this object and its aggregated objects.
*/
- protected static List extractArgumentSources(Class sourceClass) {
+ public static List extractArgumentSources(Class sourceClass) {
+ return extractArgumentSources(sourceClass, new Field[0]);
+ }
+
+ private static List extractArgumentSources(Class sourceClass, Field[] parentFields) {
List argumentSources = new ArrayList();
while( sourceClass != null ) {
Field[] fields = sourceClass.getDeclaredFields();
for( Field field: fields ) {
if( ArgumentTypeDescriptor.isArgumentAnnotationPresent(field) )
- argumentSources.add( new ArgumentSource(sourceClass,field) );
- if( field.isAnnotationPresent(ArgumentCollection.class) )
- argumentSources.addAll( extractArgumentSources(field.getType()) );
+ argumentSources.add( new ArgumentSource(parentFields, field) );
+ if( field.isAnnotationPresent(ArgumentCollection.class) ) {
+ Field[] newParentFields = Arrays.copyOf(parentFields, parentFields.length + 1);
+ newParentFields[parentFields.length] = field;
+ argumentSources.addAll( extractArgumentSources(field.getType(), newParentFields) );
+ }
}
sourceClass = sourceClass.getSuperclass();
}
@@ -350,24 +369,6 @@ public class ParsingEngine {
// No parse results found.
return null;
}
-
- /**
- * Gets a list of the container instances of the given type stored within the given target.
- * @param target Class holding the container.
- * @param type Container type.
- * @return A list of containers matching the given type.
- */
- private List getContainersMatching(Object target, Class> type) {
- List containers = new ArrayList();
-
- Field[] fields = target.getClass().getDeclaredFields();
- for( Field field: fields ) {
- if( field.isAnnotationPresent(ArgumentCollection.class) && type.isAssignableFrom(field.getType()) )
- containers.add(JVMUtils.getFieldValue(field,target));
- }
-
- return containers;
- }
}
/**
diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index ce8b23d44..b13c26cb1 100755
--- a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
-import org.broadinstitute.sting.gatk.GATKErrorReport;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.commandline.*;
@@ -135,7 +134,7 @@ public class CommandLineGATK extends CommandLineExecutable {
* @return A string summarizing the walkers available in this distribution.
*/
private String getAdditionalHelp() {
- String additionalHelp = "";
+ String additionalHelp;
// If no analysis name is present, fill in extra help on the walkers.
WalkerManager walkerManager = GATKEngine.getWalkerManager();
@@ -152,7 +151,7 @@ public class CommandLineGATK extends CommandLineExecutable {
private static final int WALKER_INDENT = 3;
private static final String FIELD_SEPARATOR = " ";
- private String getWalkerHelp(Class walkerType) {
+ private String getWalkerHelp(Class extends Walker> walkerType) {
// Construct a help string to output details on this walker.
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
index 588d56a19..d8d4a7861 100755
--- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@@ -40,19 +40,10 @@ import org.broadinstitute.sting.utils.help.SummaryTaglet;
import java.util.*;
/**
- * Created by IntelliJ IDEA.
- * User: hanna
- * Date: Mar 17, 2009
- * Time: 3:14:28 PM
- * To change this template use File | Settings | File Templates.
+ * Plugin manager that also provides various utilities for inspecting Walkers.
*/
public class WalkerManager extends PluginManager {
- /**
- * our log, which we want to capture anything from this class
- */
- private static Logger logger = Logger.getLogger(WalkerManager.class);
-
/**
* A collection of help text for walkers and their enclosing packages.
*/
@@ -92,7 +83,7 @@ public class WalkerManager extends PluginManager {
public String getPackageDisplayName(String packageName) {
// Try to find an override for the display name of this package.
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
- String displayName = null;
+ String displayName;
if(helpText.containsKey(displayNameKey)) {
displayName = helpText.getString(displayNameKey);
}
@@ -130,6 +121,15 @@ public class WalkerManager extends PluginManager {
return helpText.getString(walkerSummary);
}
+ /**
+ * Gets the summary help text associated with a given walker type.
+ * @param walker Walker for which to search for help text.
+ * @return Walker summary description, or "" if none exists.
+ */
+ public String getWalkerSummaryText(Walker walker) {
+ return getWalkerSummaryText(walker.getClass());
+ }
+
/**
* Gets the descriptive help text associated with a given walker type.
* @param walkerType Type of walker for which to search for help text.
@@ -142,13 +142,34 @@ public class WalkerManager extends PluginManager {
return helpText.getString(walkerDescription);
}
+ /**
+ * Gets the descriptive help text associated with a given walker type.
+ * @param walker Walker for which to search for help text.
+ * @return Walker full description, or "" if none exists.
+ */
+ public String getWalkerDescriptionText(Walker walker) {
+ return getWalkerDescriptionText(walker.getClass());
+ }
+
/**
* Retrieves the walker class given a walker name.
* @param walkerName Name of the walker.
* @return Class representing the walker.
*/
- public Class getWalkerClassByName(String walkerName) {
- return (Class)pluginsByName.get(walkerName);
+ public Class extends Walker> getWalkerClassByName(String walkerName) {
+ return pluginsByName.get(walkerName);
+ }
+
+ /**
+ * Gets the data source for the provided walker.
+ * @param walkerClass The class of the walker.
+ * @return Which type of data source to traverse over...reads or reference?
+ */
+ public static DataSource getWalkerDataSource(Class extends Walker> walkerClass) {
+ By byDataSource = walkerClass.getAnnotation(By.class);
+ if( byDataSource == null )
+ throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
+ return byDataSource.value();
}
/**
@@ -157,21 +178,38 @@ public class WalkerManager extends PluginManager {
* @return Which type of data source to traverse over...reads or reference?
*/
public static DataSource getWalkerDataSource(Walker walker) {
- Class extends Walker> walkerClass = walker.getClass();
- By byDataSource = walkerClass.getAnnotation(By.class);
- if( byDataSource == null )
- throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName());
- return byDataSource.value();
+ return getWalkerDataSource(walker.getClass());
+ }
+
+ /**
+ * Get a list of RODs allowed by the walker.
+ * @param walkerClass Class of the walker to query.
+ * @return The list of allowed reference meta data.
+ */
+ public static List getAllowsMetaData(Class extends Walker> walkerClass) {
+ Allows allowsDataSource = getWalkerAllowed(walkerClass);
+ if (allowsDataSource == null)
+ return Collections.emptyList();
+ return Arrays.asList(allowsDataSource.referenceMetaData());
+ }
+
+ /**
+ * Get a list of RODs allowed by the walker.
+ * @param walker Walker to query.
+ * @return The list of allowed reference meta data.
+ */
+ public static List getAllowsMetaData(Walker walker) {
+ return getAllowsMetaData(walker.getClass());
}
/**
* Determine whether the given walker supports the given data source.
- * @param walker Walker to query.
+ * @param walkerClass Class of the walker to query.
* @param dataSource Source to check for .
* @return True if the walker forbids this data type. False otherwise.
*/
- public static boolean isAllowed(Walker walker, DataSource dataSource) {
- Allows allowsDataSource = getWalkerAllowed(walker);
+ public static boolean isAllowed(Class extends Walker> walkerClass, DataSource dataSource) {
+ Allows allowsDataSource = getWalkerAllowed(walkerClass);
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
@@ -182,13 +220,23 @@ public class WalkerManager extends PluginManager {
}
/**
- * Determine whether the given walker supports the given reference ordered data.
+ * Determine whether the given walker supports the given data source.
* @param walker Walker to query.
+ * @param dataSource Source to check for .
+ * @return True if the walker forbids this data type. False otherwise.
+ */
+ public static boolean isAllowed(Walker walker, DataSource dataSource) {
+ return isAllowed(walker.getClass(), dataSource);
+ }
+
+ /**
+ * Determine whether the given walker supports the given reference ordered data.
+ * @param walkerClass Class of the walker to query.
* @param rod Source to check.
* @return True if the walker forbids this data type. False otherwise.
*/
- public static boolean isAllowed(Walker walker, RMDTrack rod) {
- Allows allowsDataSource = getWalkerAllowed(walker);
+ public static boolean isAllowed(Class extends Walker> walkerClass, RMDTrack rod) {
+ Allows allowsDataSource = getWalkerAllowed(walkerClass);
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
@@ -208,6 +256,27 @@ public class WalkerManager extends PluginManager {
return false;
}
+ /**
+ * Determine whether the given walker supports the given reference ordered data.
+ * @param walker Walker to query.
+ * @param rod Source to check.
+ * @return True if the walker forbids this data type. False otherwise.
+ */
+ public static boolean isAllowed(Walker walker, RMDTrack rod) {
+ return isAllowed(walker.getClass(), rod);
+ }
+
+ /**
+ * Determine whether the given walker requires the given data source.
+ * @param walkerClass Class of the walker to query.
+ * @param dataSource Source to check for.
+ * @return True if the walker allows this data type. False otherwise.
+ */
+ public static boolean isRequired(Class extends Walker> walkerClass, DataSource dataSource) {
+ Requires requiresDataSource = getWalkerRequirements(walkerClass);
+ return Arrays.asList(requiresDataSource.value()).contains(dataSource);
+ }
+
/**
* Determine whether the given walker requires the given data source.
* @param walker Walker to query.
@@ -215,18 +284,26 @@ public class WalkerManager extends PluginManager {
* @return True if the walker allows this data type. False otherwise.
*/
public static boolean isRequired(Walker walker, DataSource dataSource) {
- Requires requiresDataSource = getWalkerRequirements(walker);
- return Arrays.asList(requiresDataSource.value()).contains(dataSource);
+ return isRequired(walker.getClass(), dataSource);
+ }
+
+ /**
+ * Get a list of RODs required by the walker.
+ * @param walkerClass Class of the walker to query.
+ * @return The list of required reference meta data.
+ */
+ public static List getRequiredMetaData(Class extends Walker> walkerClass) {
+ Requires requiresDataSource = getWalkerRequirements(walkerClass);
+ return Arrays.asList(requiresDataSource.referenceMetaData());
}
/**
* Get a list of RODs required by the walker.
* @param walker Walker to query.
- * @return True if the walker allows this data type. False otherwise.
+ * @return The list of required reference meta data.
*/
public static List getRequiredMetaData(Walker walker) {
- Requires requiresDataSource = getWalkerRequirements(walker);
- return Arrays.asList(requiresDataSource.referenceMetaData());
+ return getRequiredMetaData(walker.getClass());
}
/**
@@ -238,6 +315,19 @@ public class WalkerManager extends PluginManager {
return walkerType.isAnnotationPresent(Hidden.class);
}
+ /**
+ * Extracts filters that the walker has requested be run on the dataset.
+ * @param walkerClass Class of the walker to inspect for filtering requests.
+ * @param filterManager Manages the creation of filters.
+ * @return A non-empty list of filters to apply to the reads.
+ */
+ public static List getReadFilters(Class extends Walker> walkerClass, FilterManager filterManager) {
+ List filters = new ArrayList();
+ for(Class extends SamRecordFilter> filterType: getReadFilterTypes(walkerClass))
+ filters.add(filterManager.createFilterByType(filterType));
+ return filters;
+ }
+
/**
* Extracts filters that the walker has requested be run on the dataset.
* @param walker Walker to inspect for filtering requests.
@@ -245,10 +335,28 @@ public class WalkerManager extends PluginManager {
* @return A non-empty list of filters to apply to the reads.
*/
public static List getReadFilters(Walker walker, FilterManager filterManager) {
- List filters = new ArrayList();
- for(Class extends SamRecordFilter> filterType: getReadFilterTypes(walker))
- filters.add(filterManager.createFilterByType(filterType));
- return filters;
+ return getReadFilters(walker.getClass(), filterManager);
+ }
+
+ /**
+ * Gets the type of downsampling method requested by the walker. If an alternative
+ * downsampling method is specified on the command-line, the command-line version will
+ * be used instead.
+ * @param walkerClass The class of the walker to interrogate.
+ * @return The downsampling method, as specified by the walker. Null if none exists.
+ */
+ public static DownsamplingMethod getDownsamplingMethod(Class extends Walker> walkerClass) {
+ DownsamplingMethod downsamplingMethod = null;
+
+ if( walkerClass.isAnnotationPresent(Downsample.class) ) {
+ Downsample downsampleParameters = walkerClass.getAnnotation(Downsample.class);
+ DownsampleType type = downsampleParameters.by();
+ Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
+ Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
+ downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
+ }
+
+ return downsamplingMethod;
}
/**
@@ -259,17 +367,7 @@ public class WalkerManager extends PluginManager {
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Walker walker) {
- DownsamplingMethod downsamplingMethod = null;
-
- if( walker.getClass().isAnnotationPresent(Downsample.class) ) {
- Downsample downsampleParameters = walker.getClass().getAnnotation(Downsample.class);
- DownsampleType type = downsampleParameters.by();
- Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
- Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
- downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
- }
-
- return downsamplingMethod;
+ return getDownsamplingMethod(walker.getClass());
}
/**
@@ -293,26 +391,55 @@ public class WalkerManager extends PluginManager {
/**
* Utility to get the requires attribute from the walker.
* Throws an exception if requirements are missing.
- * @param walker Walker to query for required data.
+ * @param walkerClass Class of the walker to query for required data.
* @return Required data attribute.
*/
- private static Requires getWalkerRequirements(Walker walker) {
- Class extends Walker> walkerClass = walker.getClass();
+ private static Requires getWalkerRequirements(Class extends Walker> walkerClass) {
Requires requiresDataSource = walkerClass.getAnnotation(Requires.class);
if( requiresDataSource == null )
throw new StingException( "Unable to find data types required by walker class " + walkerClass.getName());
return requiresDataSource;
}
+ /**
+ * Utility to get the requires attribute from the walker.
+ * Throws an exception if requirements are missing.
+ * @param walker Walker to query for required data.
+ * @return Required data attribute.
+ */
+ private static Requires getWalkerRequirements(Walker walker) {
+ return getWalkerRequirements(walker.getClass());
+ }
+
+ /**
+ * Utility to get the forbidden attribute from the walker.
+ * @param walkerClass Class of the walker to query for required data.
+ * @return Required data attribute. Null if forbidden info isn't present.
+ */
+ private static Allows getWalkerAllowed(Class extends Walker> walkerClass) {
+ Allows allowsDataSource = walkerClass.getAnnotation(Allows.class);
+ return allowsDataSource;
+ }
+
/**
* Utility to get the forbidden attribute from the walker.
* @param walker Walker to query for required data.
* @return Required data attribute. Null if forbidden info isn't present.
*/
private static Allows getWalkerAllowed(Walker walker) {
- Class extends Walker> walkerClass = walker.getClass();
- Allows allowsDataSource = walkerClass.getAnnotation(Allows.class);
- return allowsDataSource;
+ return getWalkerAllowed(walker.getClass());
+ }
+
+ /**
+ * Gets the list of filtering classes specified as walker annotations.
+ * @param walkerClass Class of the walker to inspect.
+ * @return An array of types extending from SamRecordFilter. Will never be null.
+ */
+ @SuppressWarnings("unchecked")
+ public static Class extends SamRecordFilter>[] getReadFilterTypes(Class extends Walker> walkerClass) {
+ if( !walkerClass.isAnnotationPresent(ReadFilters.class) )
+ return new Class[0];
+ return walkerClass.getAnnotation(ReadFilters.class).value();
}
/**
@@ -320,10 +447,7 @@ public class WalkerManager extends PluginManager {
* @param walker The walker to inspect.
* @return An array of types extending from SamRecordFilter. Will never be null.
*/
- private static Class extends SamRecordFilter>[] getReadFilterTypes(Walker walker) {
- Class extends Walker> walkerClass = walker.getClass();
- if( !walkerClass.isAnnotationPresent(ReadFilters.class) )
- return new Class[0];
- return walkerClass.getAnnotation(ReadFilters.class).value();
+ public static Class extends SamRecordFilter>[] getReadFilterTypes(Walker walker) {
+ return getReadFilterTypes(walker.getClass());
}
}
diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 90afae069..3be32ec49 100755
--- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -29,6 +29,8 @@ import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.simpleframework.xml.*;
@@ -64,7 +66,7 @@ public class GATKArgumentCollection {
// parameters and their defaults
@ElementList(required = false)
- @Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
+ @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List samFiles = new ArrayList();
@Element(required = false)
@@ -76,19 +78,19 @@ public class GATKArgumentCollection {
public List readFilters = new ArrayList();
@ElementList(required = false)
- @Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
+ @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
public List intervals = null;
@ElementList(required = false)
- @Argument(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
+ @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
public List excludeIntervals = null;
@Element(required = false)
- @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
+ @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
@ElementList(required = false)
- @Argument(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form ,,", required = false)
+ @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form ,,", required = false)
public ArrayList RODBindings = new ArrayList();
@Element(required = false)
@@ -100,30 +102,30 @@ public class GATKArgumentCollection {
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
@Element(required = false)
- @Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
+ @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
public String DBSNPFile = null;
@Element(required = false)
- @Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
+ @Input(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false)
public String HAPMAPFile = null;
@Element(required = false)
- @Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
+ @Input(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false)
public String HAPMAPChipFile = null;
/** An output file presented to the walker. */
@Element(required = false)
- @Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
+ @Output(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String outFileName = null;
/** An error output file presented to the walker. */
@Element(required = false)
- @Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
+ @Output(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false)
public String errFileName = null;
/** A joint file for both 'normal' and error output presented to the walker. */
@Element(required = false)
- @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
+ @Output(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false)
public String outErrFileName = null;
@Element(required = false)
diff --git a/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java b/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
index 44f9bdf76..bd899b80c 100644
--- a/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
@@ -30,6 +30,8 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
import net.sf.picard.filter.SamRecordFilter;
+import java.util.Collection;
+
/**
* Manage filters and filter options. Any requests for basic filtering classes
* should ultimately be made through this class.
@@ -38,11 +40,6 @@ import net.sf.picard.filter.SamRecordFilter;
* @version 0.1
*/
public class FilterManager extends PluginManager {
- /**
- * our log, which we want to capture anything from this class
- */
- private static Logger logger = Logger.getLogger(FilterManager.class);
-
public FilterManager() {
super(SamRecordFilter.class,"filter","Filter");
}
@@ -50,10 +47,14 @@ public class FilterManager extends PluginManager {
/**
* Instantiate a filter of the given type. Along the way, scream bloody murder if
* the filter is not available.
- * @param filterType
- * @return
+ * @param filterType The type of the filter
+ * @return The filter
*/
public SamRecordFilter createFilterByType(Class extends SamRecordFilter> filterType) {
return this.createByName(getName(filterType));
}
+
+ public Collection> getValues() {
+ return this.pluginsByName.values();
+ }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java
index ccbc16d37..2ac9c0314 100644
--- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java
+++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java
@@ -158,10 +158,12 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
+ source.field.getType(),
"variants_out",
"varout",
false,
source.isMultiValued(),
+ getCollectionComponentType(source.field),
source.isHidden(),
null );
}
@@ -173,13 +175,15 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
- return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation),
+ return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
+ GenotypeWriterFactory.GENOTYPE_FORMAT.class,
"variant_output_format",
"vf",
"Format to be used to represent variants; default is VCF",
false,
false,
false,
+ null,
source.isHidden(),
null,
null,
diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
index 184da8757..7f9802220 100644
--- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
+++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
@@ -97,10 +97,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
return new ArgumentDefinition( annotation,
+ source.field.getType(),
DEFAULT_ARGUMENT_FULLNAME,
DEFAULT_ARGUMENT_SHORTNAME,
false,
source.isMultiValued(),
+ getCollectionComponentType(source.field),
source.isHidden(),
null );
}
@@ -112,13 +114,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
*/
private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) {
Annotation annotation = this.getArgumentAnnotation(source);
- return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation),
+ return new ArgumentDefinition( ArgumentIOType.getIOType(annotation),
+ int.class,
COMPRESSION_FULLNAME,
COMPRESSION_SHORTNAME,
"Compression level to use for writing BAM files",
false,
false,
false,
+ null,
source.isHidden(),
null,
null,
diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
deleted file mode 100644
index 7f7b8da13..000000000
--- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.iterators;
-
-import net.sf.picard.PicardException;
-import net.sf.picard.sam.ReservedTagConstants;
-import net.sf.picard.sam.SamFileHeaderMerger;
-import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.*;
-import net.sf.samtools.util.CloseableIterator;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.Reads;
-import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.Utils;
-
-import java.lang.reflect.Constructor;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-
-// Should replace picard class with the same name
-class ComparableSamRecordIterator extends PeekableIterator implements Comparable, StingSAMIterator {
- private Reads sourceInfo;
- private final Comparator comparator;
- private final SAMFileReader reader;
- private final SamFileHeaderMerger mHeaderMerger;
-
- /**
- * Constructs an iterator for iteration over the supplied SAM file that will be
- * able to compare itself to other ComparableSAMRecordIterator instances using
- * the supplied comparator for ordering SAMRecords.
- *
- * @param sam the SAM file to read records from
- * @param comparator the Comparator to use to provide ordering fo SAMRecords
- */
- public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, final Comparator comparator) {
- super(sam.iterator());
- this.reader = sam;
- this.comparator = comparator;
- mHeaderMerger = samHeaderMerger;
- }
-
- public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, Iterator iterator, final Comparator comparator) {
- super(iterator); // use the provided iterator
- this.reader = sam;
- this.comparator = comparator;
- mHeaderMerger = samHeaderMerger;
- }
-
- public Reads getSourceInfo() {
- if (sourceInfo == null)
- throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework.");
- return sourceInfo;
- }
-
- /**
- * Returns the reader from which this iterator was constructed.
- *
- * @return the SAMFileReader
- */
- public SAMFileReader getReader() {
- return reader;
- }
-
- /**
- * Compares this iterator to another comparable iterator based on the next record
- * available in each iterator. If the two comparable iterators have different
- * comparator types internally an exception is thrown.
- *
- * @param that another iterator to compare to
- *
- * @return a negative, 0 or positive number as described in the Comparator interface
- */
- public int compareTo(final ComparableSamRecordIterator that) {
- if (this.comparator.getClass() != that.comparator.getClass()) {
- throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
- "have different orderings internally");
- }
-
- final SAMRecord record = this.peek();
- final SAMRecord record2 = that.peek();
- record.setHeader(mHeaderMerger.getMergedHeader());
- record2.setHeader(mHeaderMerger.getMergedHeader());
- int index, index2;
- try {
- index = mHeaderMerger.getMergedHeader().getSequenceIndex(record.getReferenceName());
- record.setReferenceIndex(index);
-
- index2 = mHeaderMerger.getMergedHeader().getSequenceIndex(record2.getReferenceName());
- record2.setReferenceIndex(index2);
- } catch (Exception e) {
- throw new StingException("MergingSamRecordIterator2: unable to correct the reference index for read " + record.getReadName() + " or record " + record2.getReadName(),e);
- }
- return comparator.compare(record, record2);
- }
-
- public Iterator iterator() {
- return this;
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java
index fb0fd3b25..19a6607fb 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java
@@ -31,26 +31,28 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
+import java.util.*;
/**
- * @author aaron
- *
- * Class RMDTrackManager
- *
- * Find the available track builders, and create the requisite tracks from the command line.
+ * Find the available track builders, and create the requisite tracks from the command line.
+ *
+ * In Tribble RMD tracks have two classes:
+ * - a Feature that is the model/view for the data
+ * - a Codec that is the controller to generate the Feature.
+ *
+ * In this class, the track types are the Codecs. The track record types are the Features.
*/
public class RMDTrackManager extends PluginManager {
// the input strings we use to create RODs from
List inputs = new ArrayList();
// create an active mapping of builder instances, and a map of the name -> class for convenience
- Map availableTracks;
- Map availableTrackClasses;
+ /** the tracks that are available to us, associated with their builder */
+ Map availableTrackBuilders;
+ /** the classes names, with their class description (think the Controller Codecs) */
+ Map availableTrackTypes;
+ /** the available track record types (think the Model/View Features) */
+ Map availableTrackRecordTypes;
/** Create a new track plugin manager. */
public RMDTrackManager() {
@@ -65,28 +67,56 @@ public class RMDTrackManager extends PluginManager {
* @return a list of RMDTracks, one for each -B option
*/
public List getReferenceMetaDataSources(List triplets) {
- if (availableTracks == null || availableTrackClasses == null) initialize(triplets);
+ initializeTrackTypes();
+ initializeTriplets(triplets);
// try and make the tracks given their requests
- return createRequestedTrackObjects(availableTracks, availableTrackClasses);
+ return createRequestedTrackObjects();
+ }
+
+
+ /**
+ * Returns a collection of track names that match the record type.
+ * @param trackRecordType the record type specified in the @RMD annotation
+ * @return a collection of available track record type names that match the record type
+ */
+ public Collection getTrackRecordTypeNames(Class trackRecordType) {
+ initializeTrackTypes();
+ initializeTrackRecordTypes();
+ Set names = new TreeSet();
+ for (Map.Entry availableTrackRecordType: availableTrackRecordTypes.entrySet()) {
+ if (trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
+ names.add(availableTrackRecordType.getKey());
+ }
+ return names;
}
/**
- * initialize our lists of tracks and builders
+ * initialize our lists of triplets
* @param triplets the input to the GATK, as a list of strings passed in through the -B options
*/
- private void initialize(List triplets) {
+ private void initializeTriplets(List triplets) {
+ // NOTE: Method acts as a static. Once the inputs have been passed once they are locked in.
+ if (inputs.size() > 0 || triplets.size() == 0)
+ return;
+
for (String value: triplets) {
String[] split = value.split(",");
if (split.length != 3) throw new IllegalArgumentException(value + " is not a valid reference metadata track description");
inputs.add(new RMDTriplet(split[0], split[1], split[2]));
}
+ }
+
+ /**
+ * initialize our lists of tracks and builders
+ */
+ private void initializeTrackTypes() {
+ if (availableTrackBuilders != null && availableTrackTypes != null)
+ return;
// create an active mapping of builder instances, and a map of the name -> class for convenience
- availableTracks = new HashMap();
- availableTrackClasses = new HashMap();
+ availableTrackBuilders = new HashMap();
+ availableTrackTypes = new HashMap();
createBuilderObjects();
-
-
}
/**
@@ -98,8 +128,24 @@ public class RMDTrackManager extends PluginManager {
RMDTrackBuilder builder = this.createByName(builderName);
Map mapping = builder.getAvailableTrackNamesAndTypes();
for (String name : mapping.keySet()) {
- availableTracks.put(name.toUpperCase(), builder);
- availableTrackClasses.put(name.toUpperCase(), mapping.get(name));
+ availableTrackBuilders.put(name.toUpperCase(), builder);
+ availableTrackTypes.put(name.toUpperCase(), mapping.get(name));
+ }
+ }
+ }
+
+ /**
+ * initialize our list of track record types
+ */
+ private void initializeTrackRecordTypes() {
+ if (availableTrackRecordTypes != null)
+ return;
+
+ availableTrackRecordTypes = new HashMap();
+ for (RMDTrackBuilder builder : availableTrackBuilders.values()) {
+ Map mapping = builder.getAvailableTrackNamesAndRecordTypes();
+ for (String name : mapping.keySet()) {
+ availableTrackRecordTypes.put(name.toUpperCase(), mapping.get(name));
}
}
}
@@ -107,22 +153,18 @@ public class RMDTrackManager extends PluginManager {
/**
* create the requested track objects
*
- * @param availableTracks the tracks that are available to us, associated with their builder
- * @param availableTrackClasses the classes names, with their class description
- *
* @return a list of the tracks, one for each of the requested input tracks
*/
- private List createRequestedTrackObjects(Map availableTracks, Map availableTrackClasses) {
+ private List createRequestedTrackObjects() {
// create of live instances of the tracks
List tracks = new ArrayList();
// create instances of each of the requested types
for (RMDTriplet trip : inputs) {
- RMDTrackBuilder b = availableTracks.get(trip.getType().toUpperCase());
+ RMDTrackBuilder b = availableTrackBuilders.get(trip.getType().toUpperCase());
if (b == null) throw new StingException("Unable to find track for " + trip.getType());
- tracks.add(b.createInstanceOfTrack(availableTrackClasses.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile())));
+ tracks.add(b.createInstanceOfTrack(availableTrackTypes.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile())));
}
return tracks;
}
}
-
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java
index 01c971acb..17b778f45 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java
@@ -44,6 +44,9 @@ public interface RMDTrackBuilder {
/** @return a list of all available tracks types we currently have access to create */
public Map getAvailableTrackNamesAndTypes();
+ /** @return a list of all available track record types we currently have access to create */
+ public Map getAvailableTrackNamesAndRecordTypes();
+
/**
* create a RMDTrack of the specified type
*
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java
index dc5de4e20..b04b2dad1 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java
@@ -75,15 +75,19 @@ public class RODTrackBuilder implements RMDTrackBuilder {
return new RODRMDTrack(targetClass, name, inputFile, createROD(name,targetClass,inputFile));
}
- /** @return a map of all available tracks we currently have access to create */
+ /** @return a map of all available track types we currently have access to create */
+ @Override
public Map getAvailableTrackNamesAndTypes() {
- Map ret = new HashMap();
- for (String name : Types.keySet())
- ret.put(name, Types.get(name));
- return ret;
+ return new HashMap(Types);
}
-/**
+ /** @return a map of all available track record types we currently have access to create */
+ @Override
+ public Map getAvailableTrackNamesAndRecordTypes() {
+ return new HashMap(Types);
+ }
+
+ /**
* Helpful function that parses a single triplet of and returns the corresponding ROD with
* , of type that reads its input from .
*
diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java
index b5e18e069..955d5111e 100644
--- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java
@@ -35,7 +35,6 @@ import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.index.interval.IntervalIndexCreator;
import org.broad.tribble.index.linear.LinearIndexCreator;
import org.broad.tribble.source.BasicFeatureSource;
-import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@@ -80,12 +79,20 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
super(FeatureCodec.class, "Codecs", "Codec");
}
- /** @return a list of all available tracks we currently have access to create */
+ /** @return a list of all available track types we currently have access to create */
@Override
public Map getAvailableTrackNamesAndTypes() {
+ return new HashMap(this.pluginsByName);
+ }
+
+ /** @return a list of all available track record types we currently have access to create */
+ @Override
+ public Map getAvailableTrackNamesAndRecordTypes() {
Map classes = new HashMap();
- for (String c : this.pluginsByName.keySet())
- classes.put(c, this.pluginsByName.get(c));
+ for (String name: this.pluginsByName.keySet()) {
+ FeatureCodec codec = this.createByName(name);
+ classes.put(name, codec.getFeatureType());
+ }
return classes;
}
@@ -115,11 +122,12 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
/**
* create a feature reader of the specified type
* @param targetClass the target codec type
+ * @param name the target name
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public Pair createFeatureReader(Class targetClass, String name, File inputFile) {
- Pair pair = null;
+ Pair pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile);
else
@@ -133,6 +141,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
* exists.
*
* @param targetClass the codec class type
+ * @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
@@ -156,6 +165,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
/**
* create a linear feature reader, where we create the index ahead of time
* @param targetClass the target class
+ * @param name the name of the codec
* @param inputFile the tribble file to parse
* @return the input file as a FeatureReader
*/
@@ -264,7 +274,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
* @param indexFile the index file location
* @param lock the locking object
* @return the index object
- * @throws IOException
+ * @throws IOException when unable to create the new index
*/
private static Index createNewIndex(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, FSLockWithShared lock) throws IOException {
Index index = createIndexInMemory(inputFile, codec);
@@ -296,7 +306,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
* @param inputFile the input file
* @param codec the codec
* @return a LinearIndex, given the file location
- * @throws IOException
+ * @throws IOException when unable to create the index in memory
*/
private static Index createIndexInMemory(File inputFile, FeatureCodec codec) throws IOException {
// this can take a while, let them know what we're doing
@@ -317,7 +327,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen
* @param contigList the contig list, in coordinate order, this is allowed to be null
* @return a SAMSequenceDictionary, WITHOUT contig sizes
*/
- private static final SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet contigList) {
+ private static SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet contigList) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
if (contigList == null) return dict;
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
index 83e363b20..acbc708bb 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
@@ -487,7 +487,7 @@ public class ClipReadsWalker extends ReadWalker {
/** an optional argument to dump the reads out to a BAM file */
- @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
+ @Output(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
SAMFileWriter outputBamFile = null;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
String readGroup = null;
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java b/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java
index 20479a05c..306b3b1d3 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java
@@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
+import org.broad.tribble.Feature;
+
import java.lang.annotation.Documented;
import java.lang.annotation.Inherited;
import java.lang.annotation.Retention;
@@ -25,5 +27,5 @@ import java.lang.annotation.RetentionPolicy;
@Retention(RetentionPolicy.RUNTIME)
public @interface RMD {
String name();
- Class type();
+ Class type() default Feature.class;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java
index 1480a9ee9..2d72fb439 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java
@@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import org.broad.tribble.util.variantcontext.VariantContext;
+import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
@@ -85,7 +86,7 @@ public class CovariateCounterWalker extends LocusWalker getAnnotationIOClass() { return argumentDefinition.ioType.annotationClass; }
+ @Override protected String getDoc() { return escape(argumentDefinition.doc); }
+ @Override protected String getFullName() { return escape(argumentDefinition.fullName); }
+ @Override protected String getShortName() { return escape(argumentDefinition.shortName); }
+ @Override protected boolean isRequired() { return argumentDefinition.required; }
+ @Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); }
+ @Override protected String getValidation() { return escape(argumentDefinition.validation); }
+
+ protected static final String REQUIRED_TEMPLATE = " + \" %1$s \" + %2$s.format(%3$s)";
+ protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=%2$s)";
+ protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=%2$s)";
+ protected static final String FLAG_TEMPLATE = " + (if (%3$s) \" %1$s \" else \"\")";
+
+ public final String getCommandLineAddition() {
+ return String.format(getCommandLineTemplate(), getCommandLineParam(), getCommandLineFormat(), getFieldName());
+ }
+
+ protected String getCommandLineParam() {
+ return (argumentDefinition.shortName != null)
+ ? "-" + argumentDefinition.shortName
+ : "--" + argumentDefinition.fullName;
+ }
+
+ protected String getCommandLineFormat() {
+ return "\"%s\"";
+ }
+
+ @Override
+ protected String getScatterGatherAnnotation() {
+ return "";
+ }
+
+ protected String getCommandLineTemplate() {
+ return isRequired() ? REQUIRED_TEMPLATE : OPTIONAL_TEMPLATE;
+ }
+
+ public static List extends ArgumentField> getArgumentFields(Class> classType) {
+ List argumentFields = new ArrayList();
+ for (ArgumentSource argumentSource: ParsingEngine.extractArgumentSources(classType))
+ for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
+ argumentFields.addAll(getArgumentFields(argumentDefinition));
+ return argumentFields;
+ }
+
+ private static final List intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals");
+
+ private static List extends ArgumentField> getArgumentFields(ArgumentDefinition argumentDefinition) {
+ if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
+ boolean scatter = "intervals".equals(argumentDefinition.fullName);
+ return Arrays.asList(
+ new IntervalFileArgumentField(argumentDefinition, scatter),
+ new IntervalStringArgumentField(argumentDefinition));
+
+ // ROD Bindings are set by the RodBindField
+ } else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
+ // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
+ return Collections.singletonList(new RodBindArgumentField(argumentDefinition, argumentDefinition.required));
+ //return Collections.emptyList();
+
+ } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
+ return Arrays.asList(new InputArgumentField(argumentDefinition), new IndexFilesField());
+
+ } else if (argumentDefinition.ioType == ArgumentIOType.INPUT) {
+ return Collections.singletonList(new InputArgumentField(argumentDefinition));
+
+ } else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) {
+ return Collections.singletonList(new OutputArgumentField(argumentDefinition));
+
+ } else if (argumentDefinition.isFlag) {
+ return Collections.singletonList(new FlagArgumentField(argumentDefinition));
+
+ } else if (argumentDefinition.isMultiValued) {
+ return Collections.singletonList(new MultiValuedArgumentField(argumentDefinition));
+
+ } else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) {
+ boolean useFormat = useFormatter(argumentDefinition.argumentType);
+ List fields = new ArrayList();
+ ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat);
+ fields.add(field);
+ if (useFormat) fields.add(new FormatterArgumentField(field));
+ return fields;
+
+ } else {
+ boolean useFormat = useFormatter(argumentDefinition.argumentType);
+ List fields = new ArrayList();
+ ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat);
+ fields.add(field);
+ if (useFormat) fields.add(new FormatterArgumentField(field));
+ return fields;
+
+ }
+ }
+
+ // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT)
+ // Change intervals to an input file, and optionally scatter it.
+ private static class IntervalFileArgumentField extends InputArgumentField {
+ private final boolean scatter;
+ public IntervalFileArgumentField(ArgumentDefinition argumentDefinition, boolean scatter) {
+ super(argumentDefinition);
+ this.scatter = scatter;
+ }
+
+ @Override protected boolean isMultiValued() { return !this.scatter && super.isMultiValued(); }
+ @Override public boolean isScatter() { return this.scatter; }
+ @Override protected String getScatterGatherAnnotation() {
+ return scatter ? String.format("@Scatter(classOf[IntervalScatterFunction])%n") : super.getScatterGatherAnnotation();
+ }
+
+ @Override
+ protected String getExclusiveOf() {
+ StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf());
+ if (exclusiveOf.length() > 0)
+ exclusiveOf.append(",");
+ exclusiveOf.append(escape(argumentDefinition.fullName)).append("String");
+ return exclusiveOf.toString();
+ }
+ }
+
+ // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT)
+ // Change intervals to a string but as an argument.
+ private static class IntervalStringArgumentField extends ArgumentDefinitionField {
+ public IntervalStringArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override protected Class extends Annotation> getAnnotationIOClass() { return Argument.class; }
+ @Override protected Class> getInnerType() { return String.class; }
+ @Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; }
+ @Override protected String getFullName() { return super.getFullName() + "String"; }
+ @Override protected String getFieldType() { return "List[String]"; }
+ @Override protected String getDefaultValue() { return "Nil"; }
+ @Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
+
+ @Override
+ protected String getExclusiveOf() {
+ StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf());
+ if (exclusiveOf.length() > 0)
+ exclusiveOf.append(",");
+ exclusiveOf.append(escape(argumentDefinition.fullName));
+ return exclusiveOf.toString();
+ }
+ }
+
+ // if (argumentDefinition.ioType == ArgumentIOType.INPUT)
+ // Map all inputs to files. Handles multi valued files.
+ private static class InputArgumentField extends ArgumentDefinitionField {
+ public InputArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition);
+ }
+
+ @Override protected Class> getInnerType() { return File.class; }
+ @Override protected String getFieldType() { return String.format(isMultiValued() ? "List[%s]" : "%s", getRawFieldType()); }
+ @Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
+ @Override protected String getCommandLineTemplate() {
+ return isMultiValued() ? REPEAT_TEMPLATE : super.getCommandLineTemplate();
+ }
+
+ protected String getRawFieldType() { return "File"; }
+ protected boolean isMultiValued() { return argumentDefinition.isMultiValued; }
+ }
+
+ // if (argumentDefinition.ioType == ArgumentIOType.OUTPUT)
+ // Map all outputs to files.
+ private static class OutputArgumentField extends ArgumentDefinitionField {
+ public OutputArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition);
+ }
+
+ @Override protected Class> getInnerType() { return File.class; }
+ @Override protected String getFieldType() { return "File"; }
+ @Override protected String getDefaultValue() { return "_"; }
+
+ @Override public boolean isGather() { return true; }
+ @Override protected String getScatterGatherAnnotation() {
+ return String.format(SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)
+ ? "@Gather(classOf[BamGatherFunction])%n"
+ : "@Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])%n");
+ }
+ }
+
+ // if (argumentDefinition.isFlag)
+ // Booleans should be set on the commandline only if they are true.
+ private static class FlagArgumentField extends ArgumentDefinitionField {
+ public FlagArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition);
+ }
+
+ @Override protected Class> getInnerType() { return boolean.class; }
+ @Override protected String getFieldType() { return "Boolean"; }
+ @Override protected String getDefaultValue() { return "_"; }
+ @Override protected String getCommandLineTemplate() { return FLAG_TEMPLATE; }
+ }
+
+ // if (argumentDefinition.isMultiValued)
+ // Multi value arguments are mapped to List[] and use repeat.
+ private static class MultiValuedArgumentField extends ArgumentDefinitionField {
+ public MultiValuedArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition);
+ }
+
+ @Override protected Class> getInnerType() { return mapType(argumentDefinition.componentType); }
+ @Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); }
+ @Override protected String getDefaultValue() { return "Nil"; }
+ @Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
+ }
+
+ // if (!argumentDefinition.required && useOption(argumentDefinition.argumentType))
+ // Any optional arguments that are primitives / enums are wrapped in options.
+ private static class OptionedArgumentField extends ArgumentDefinitionField {
+ private final boolean useFormatter;
+
+ public OptionedArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) {
+ super(argumentDefinition);
+ this.useFormatter = useFormatter;
+ }
+
+ @Override protected Class> getInnerType() { return mapType(argumentDefinition.argumentType); }
+ @Override protected String getFieldType() { return String.format("Option[%s]", getType(getInnerType())); }
+ @Override protected String getDefaultValue() { return "None"; }
+ @Override protected String getCommandLineTemplate() { return OPTIONAL_TEMPLATE; }
+ @Override protected String getCommandLineFormat() {
+ return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat();
+ }
+ }
+
+ // Any other @Arguments
+ private static class DefaultArgumentField extends ArgumentDefinitionField {
+ private final boolean useFormatter;
+
+ public DefaultArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) {
+ super(argumentDefinition);
+ this.useFormatter = useFormatter;
+ }
+
+ @Override protected Class> getInnerType() { return mapType(argumentDefinition.argumentType); }
+ @Override protected String getFieldType() { return getType(getInnerType()); }
+ @Override protected String getDefaultValue() { return "_"; }
+ @Override protected String getCommandLineFormat() {
+ return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat();
+ }
+ }
+
+ /**
+ * The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file.
+ */
+ public static class RodBindArgumentField extends InputArgumentField {
+ private boolean isRequired;
+ public RodBindArgumentField(ArgumentDefinition argumentDefinition, boolean isRequired) {
+ super(argumentDefinition);
+ this.isRequired = isRequired;
+ }
+
+ @Override protected boolean isRequired() { return this.isRequired; }
+ @Override protected String getRawFieldType() { return "RodBind"; }
+ }
+
+ /**
+ * Adds optional inputs for the indexes of any bams or sams added to this function.
+ */
+ private static class IndexFilesField extends ArgumentField {
+ @Override protected Class extends Annotation> getAnnotationIOClass() { return Input.class; }
+ @Override public String getCommandLineAddition() { return ""; }
+ @Override protected String getDoc() { return "Dependencies on any index files for any bams or sams added to input_files"; }
+ @Override protected String getFullName() { return "index_files"; }
+ @Override protected boolean isRequired() { return false; }
+ @Override protected String getFieldType() { return "List[File]"; }
+ @Override protected String getDefaultValue() { return "Nil"; }
+ @Override protected Class> getInnerType() { return File.class; }
+ @Override protected String getRawFieldName() { return "index_files"; }
+ @Override protected String getFreezeFields() {
+ return String.format(
+ "index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" +
+ "index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n");
+ }
+ }
+
+ private static class FormatterArgumentField extends ArgumentField {
+ private final ArgumentField argumentField;
+ public FormatterArgumentField(ArgumentField argumentField) {
+ this.argumentField = argumentField;
+ }
+ @Override protected Class extends Annotation> getAnnotationIOClass() { return Argument.class; }
+ @Override public String getCommandLineAddition() { return ""; }
+ @Override protected String getDoc() { return "Format string for " + this.argumentField.getFullName(); }
+ @Override protected String getFullName() { return this.argumentField.getFullName() + "Format"; }
+ @Override protected boolean isRequired() { return false; }
+ @Override protected String getFieldType() { return "String"; }
+ @Override protected String getDefaultValue() { return "\"%s\""; }
+ @Override protected Class> getInnerType() { return String.class; }
+ @Override protected String getRawFieldName() { return this.argumentField.getRawFieldName() + "Format"; }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
new file mode 100644
index 000000000..ef7f6f729
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk;
+
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileWriter;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang.StringUtils;
+import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
+import java.util.*;
+
+public abstract class ArgumentField {
+
+ public Collection getImportStatements() {
+ List imports = new ArrayList();
+ for (Class> importClass: getImportClasses()) {
+ if (!isBuiltIn(importClass))
+ imports.add("import " + importClass.getName().replace("$", "."));
+ }
+ return imports;
+ }
+
+ /**
+ * Returns true if a class is built in and doesn't need to be imported.
+ * @param argType The class to check.
+ * @return true if the class is built in and doesn't need to be imported
+ */
+ private static boolean isBuiltIn(Class> argType) {
+ return argType.isPrimitive() || argType == String.class || Number.class.isAssignableFrom(argType);
+ }
+
+ /** @return Scala code defining the argument and it's annotation. */
+ public final String getArgumentAddition() {
+ return String.format("%n" +
+ "/** %s */%n" +
+ "@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" +
+ "%svar %s: %s = %s%n",
+ getDoc(),
+ getAnnotationIOClass().getSimpleName(),
+ getFullName(),
+ getShortName(),
+ getDoc(),
+ isRequired(),
+ getExclusiveOf(),
+ getValidation(),
+ getScatterGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue());
+ }
+
+ /** @return Scala code to append to the command line. */
+ public abstract String getCommandLineAddition();
+
+ // Argument Annotation
+
+ /** @return Documentation for the annotation. */
+ protected abstract String getDoc();
+
+ /** @return Annotation class of the annotation. */
+ protected abstract Class extends Annotation> getAnnotationIOClass();
+
+ /** @return Full name for the annotation. */
+ protected abstract String getFullName();
+
+ /** @return Short name for the annotation or "". */
+ protected String getShortName() { return ""; }
+
+ /** @return true if the argument is required. */
+ protected abstract boolean isRequired();
+
+ /** @return A comma separated list of arguments that may be substituted for this field. */
+ protected String getExclusiveOf() { return ""; }
+
+ /** @return A validation string for the argument. */
+ protected String getValidation() { return ""; }
+
+ /** @return A scatter or gather annotation with a line feed, or "". */
+ protected String getScatterGatherAnnotation() { return ""; }
+
+ // Scala
+
+ /** @return The scala field type. */
+ protected abstract String getFieldType();
+
+ /** @return The scala default value. */
+ protected abstract String getDefaultValue();
+
+ /** @return The class of the field, or the component type if the scala field is a collection. */
+ protected abstract Class> getInnerType();
+
+ /** @return A custom command for overriding freeze. */
+ protected String getFreezeFields() { return ""; }
+
+ @SuppressWarnings("unchecked")
+ protected Collection> getImportClasses() {
+ return Arrays.asList(this.getInnerType(), getAnnotationIOClass());
+ }
+
+ /** @return True if this field uses @Scatter. */
+ public boolean isScatter() { return false; }
+
+ /** @return True if this field uses @Gather. */
+ public boolean isGather() { return false; }
+
+ /** @return The raw field name, which will be checked against scala build in types. */
+ protected abstract String getRawFieldName();
+ /** @return The field name checked against reserved words. */
+ protected final String getFieldName() {
+ return getFieldName(this.getRawFieldName());
+ }
+
+ /**
+ * @param rawFieldName The raw field name
+ * @return The field name checked against reserved words.
+ */
+ protected static String getFieldName(String rawFieldName) {
+ String fieldName = rawFieldName;
+ if (!StringUtils.isAlpha(fieldName.substring(0,1)))
+ fieldName = "_" + fieldName;
+ if (isReserved(fieldName) || fieldName.contains("-"))
+ fieldName = "`" + fieldName + "`";
+ return fieldName;
+ }
+
+ /** via http://www.scala-lang.org/sites/default/files/linuxsoft_archives/docu/files/ScalaReference.pdf */
+ private static final List reservedWords = Arrays.asList(
+ "abstract", "case", "catch", "class", "def",
+ "do", "else", "extends", "false", "final",
+ "finally", "for", "forSome", "if", "implicit",
+ "import", "lazy", "match", "new", "null",
+ "object", "override", "package", "private", "protected",
+ "return", "sealed", "super", "this", "throw",
+ "trait", "try", "true", "type", "val",
+ "var", "while", "with", "yield");
+
+ protected static boolean isReserved(String word) {
+ return reservedWords.contains(word);
+ }
+
+ /**
+ * On primitive types returns the capitalized scala type.
+ * @param argType The class to check for options.
+ * @return the simple name of the class.
+ */
+ protected static String getType(Class> argType) {
+ String type = argType.getSimpleName();
+
+ if (argType.isPrimitive())
+ type = StringUtils.capitalize(type);
+
+ if ("Integer".equals(type))
+ type = "Int";
+
+ return type;
+ }
+
+ protected static String escape(String string) {
+ return (string == null) ? "" : StringEscapeUtils.escapeJava(string);
+ }
+
+ /**
+ * @param argType The class to check for options.
+ * @return true if option should be used.
+ */
+ protected static boolean useOption(Class> argType) {
+ return (argType.isPrimitive()) || (Number.class.isAssignableFrom(argType)) || (argType.isEnum());
+ }
+
+ /**
+ * @param argType The class to check for options.
+ * @return true if option should be used.
+ */
+ protected static boolean useFormatter(Class> argType) {
+ return (argType.equals(Double.class) || argType.equals(Double.TYPE) ||
+ argType.equals(Float.class) || argType.equals(Float.TYPE));
+ }
+
+ // TODO: Use an annotation, type descriptor, anything but hardcoding these lists!
+
+ protected static Class> mapType(Class> clazz) {
+ if (InputStream.class.isAssignableFrom(clazz)) return File.class;
+ if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class;
+ if (OutputStream.class.isAssignableFrom(clazz)) return File.class;
+ if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class;
+ if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class;
+ if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class;
+ return clazz;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java
new file mode 100644
index 000000000..cefca44da
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk;
+
+import org.broadinstitute.sting.commandline.CommandLineProgram;
+import org.broadinstitute.sting.utils.classloader.PluginManager;
+
+import java.util.Collection;
+
+/**
+ * Finds all command line programs.
+ */
+public class CommandLineProgramManager extends PluginManager {
+ public CommandLineProgramManager() {
+ super(CommandLineProgram.class, "CommandLineProgram", "CLP");
+ }
+
+ public Collection> getValues() {
+ return this.pluginsByName.values();
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
new file mode 100644
index 000000000..207da8a1f
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk;
+
+import net.sf.picard.filter.SamRecordFilter;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.filters.FilterManager;
+import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor;
+import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
+import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
+import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.Map.Entry;
+
+/**
+ * Generates Queue modules that can be used to run GATK walkers.
+ *
+ * ArgumentCollections are flattened into a single module.
+ */
+public class GATKExtensionsGenerator extends CommandLineProgram {
+ private static final Logger logger = Logger.getRootLogger();
+ public static final String GATK_EXTENSIONS_PACKAGE_NAME = "org.broadinstitute.sting.queue.extensions.gatk";
+ private static final String COMMANDLINE_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
+ private static final String FILTER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
+ private static final String WALKER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME;
+
+ @Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true)
+ public File outputDirectory;
+
+ CommandLineProgramManager clpManager = new CommandLineProgramManager();
+ GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
+ WalkerManager walkerManager = new WalkerManager();
+ FilterManager filterManager = new FilterManager();
+ RMDTrackManager rmdTrackManager = new RMDTrackManager();
+
+ /**
+ * Required main method implementation.
+ * @param argv Command-line arguments.
+ */
+ public static void main(String[] argv) {
+ try {
+ start(new GATKExtensionsGenerator(), argv);
+ System.exit(CommandLineProgram.result);
+ } catch (Exception e) {
+ exitSystemWithError(e);
+ }
+ }
+
+ @Override
+ protected Collection getArgumentTypeDescriptors() {
+ List typeDescriptors = new ArrayList();
+ typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine));
+ typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine));
+ typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine));
+ typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine));
+ return typeDescriptors;
+ }
+
+ @Override
+ protected int execute() {
+ try {
+ if (!outputDirectory.isDirectory() && !outputDirectory.mkdirs())
+ throw new StingException("Unable to create output directory: " + outputDirectory);
+
+ for (Class extends CommandLineProgram> clp: clpManager.getValues()) {
+
+ if (!isGatkProgram(clp))
+ continue;
+
+ String clpClassName = clpManager.getName(clp);
+
+ writeClass("org.broadinstitute.sting.queue.function.JarCommandLineFunction", COMMANDLINE_PACKAGE_NAME, clpClassName,
+ "", ArgumentDefinitionField.getArgumentFields(clp));
+
+ if (clp == CommandLineGATK.class) {
+ for (Entry>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) {
+ for(Class extends Walker> walkerType: walkersByPackage.getValue()) {
+ String walkerName = walkerManager.getName(walkerType);
+ List argumentFields = new ArrayList();
+
+ argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(walkerType));
+ argumentFields.addAll(RodBindField.getRodArguments(walkerType, rmdTrackManager));
+ argumentFields.addAll(ReadFilterField.getFilterArguments(walkerType));
+
+ writeClass(COMMANDLINE_PACKAGE_NAME + "." + clpClassName, WALKER_PACKAGE_NAME,
+ walkerName, String.format("analysis_type = \"%s\"%n%n", walkerName), argumentFields);
+ }
+ }
+ }
+ }
+
+ for (Class extends SamRecordFilter> filter: filterManager.getValues()) {
+ String filterName = filterManager.getName(filter);
+ writeFilter(FILTER_PACKAGE_NAME, filterName, ArgumentDefinitionField.getArgumentFields(filter));
+ }
+
+ return 0;
+ } catch (IOException exception) {
+ logger.error("Error generating queue output.", exception);
+ return 1;
+ }
+ }
+
+ private static final List gatkPackages = Arrays.asList(
+ "org.broadinstitute.sting.gatk",
+ "org.broadinstitute.sting.analyzecovariates");
+ private boolean isGatkProgram(Class> clazz) {
+ if (clazz.getPackage() == null)
+ return false;
+ String classPackage = clazz.getPackage().getName();
+ for (String gatkPackage : gatkPackages)
+ if (classPackage.startsWith(gatkPackage))
+ return true;
+ return false;
+ }
+
+ private void writeClass(String baseClass, String packageName, String className, String constructor,
+ List extends ArgumentField> argumentFields) throws IOException {
+ String content = getContent(CLASS_TEMPLATE, baseClass, packageName, className, constructor, "", argumentFields);
+ writeFile(packageName + "." + className, content);
+ }
+
+ private void writeFilter(String packageName, String className, List extends ArgumentField> argumentFields) throws IOException {
+ String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
+ packageName, className, "", String.format(" + \" -read_filter %s\"", className), argumentFields);
+ writeFile(packageName + "." + className, content);
+ }
+
+ private void writeFile(String fullClassName, String content) throws IOException {
+ File outputFile = new File(outputDirectory, fullClassName.replace(".", "/") + ".scala");
+ if (outputFile.exists()) {
+ String existingContent = FileUtils.readFileToString(outputFile);
+ if (StringUtils.equals(content, existingContent))
+ return;
+ }
+ FileUtils.writeStringToFile(outputFile, content);
+ }
+
+ private static String getContent(String scalaTemplate, String baseClass, String packageName, String className,
+ String constructor, String commandLinePrefix, List extends ArgumentField> argumentFields) {
+ StringBuilder arguments = new StringBuilder();
+ StringBuilder commandLine = new StringBuilder(commandLinePrefix);
+
+ Set importSet = new HashSet();
+ boolean isScatter = false;
+ boolean isGather = false;
+ List freezeFields = new ArrayList();
+ for(ArgumentField argumentField: argumentFields) {
+ arguments.append(argumentField.getArgumentAddition());
+ commandLine.append(argumentField.getCommandLineAddition());
+ importSet.addAll(argumentField.getImportStatements());
+ freezeFields.add(argumentField.getFreezeFields());
+
+ isScatter |= argumentField.isScatter();
+ isGather |= argumentField.isGather();
+ }
+
+ if (isScatter) {
+ importSet.add("import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction");
+ importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Scatter");
+ baseClass += " with ScatterGatherableFunction";
+ }
+ if (isGather)
+ importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Gather");
+
+ // Sort the imports so that the are always in the same order.
+ List sortedImports = new ArrayList(importSet);
+ Collections.sort(sortedImports);
+
+ StringBuffer freezeFieldOverride = new StringBuffer();
+ for (String freezeField: freezeFields)
+ freezeFieldOverride.append(freezeField);
+ if (freezeFieldOverride.length() > 0) {
+ freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n"));
+ freezeFieldOverride.append(String.format("}%n%n"));
+ }
+
+ // see CLASS_TEMPLATE and TRAIT_TEMPLATE below
+ return String.format(scalaTemplate, packageName, StringUtils.join(sortedImports, NEWLINE),
+ className, baseClass, constructor, arguments, freezeFieldOverride, commandLine);
+ }
+
+ private static final String NEWLINE = String.format("%n");
+
+ private static final String CLASS_TEMPLATE = "package %s%n"+
+ "%s%n" +
+ "class %s extends %s {%n" +
+ "%s%s%n" +
+ "%soverride def commandLine = super.commandLine%s%n" +
+ "}%n";
+
+ private static final String TRAIT_TEMPLATE = "package %s%n"+
+ "%s%n" +
+ "trait %s extends %s {%n" +
+ "%s%s%n" +
+ "%sabstract override def commandLine = super.commandLine%s%n" +
+ "}%n";
+}
diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java
new file mode 100644
index 000000000..23eacceae
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk;
+
+import net.sf.picard.filter.SamRecordFilter;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ReadFilterField {
+ /**
+ * Adds an argument for each read filters listed on the walker.
+ * @param walkerClass the class of the walker
+ * @return the list of argument fields
+ */
+ public static List getFilterArguments(Class extends Walker> walkerClass) {
+ List argumentFields = new ArrayList();
+ for(Class extends SamRecordFilter> filter: WalkerManager.getReadFilterTypes(walkerClass))
+ argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(filter));
+ return argumentFields;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java
new file mode 100644
index 000000000..7ae929b93
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.gatk;
+
+import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager;
+import org.broadinstitute.sting.gatk.walkers.RMD;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+
+import java.io.File;
+import java.lang.annotation.Annotation;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Allows user to specify the rod file but locks in the track name and the track type.
+ */
+public class RodBindField extends ArgumentField {
+ public static final String ROD_BIND_FIELD = "rodBind";
+
+ private final String trackName;
+ private final String typeName;
+ private final List relatedFields;
+ private final boolean isRequired;
+
+ public RodBindField(String trackName, String typeName, List relatedFields, boolean isRequired) {
+ this.trackName = trackName;
+ this.typeName = typeName;
+ this.relatedFields = relatedFields;
+ this.isRequired = isRequired;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override protected Class extends Annotation> getAnnotationIOClass() { return Input.class; }
+ @Override protected Class> getInnerType() { return File.class; }
+ @Override protected String getFullName() { return escape(getRawFieldName()); }
+ @Override protected String getFieldType() { return "File"; }
+ @Override protected String getDefaultValue() { return "_"; }
+ @Override protected String getRawFieldName() { return this.trackName + this.typeName; }
+ @Override protected String getDoc() { return escape(this.typeName + " " + this.trackName); }
+ @Override protected boolean isRequired() { return this.isRequired; }
+
+ @Override public String getCommandLineAddition() {
+ return String.format(this.useOption()
+ ? " + optional(\" -B %s,%s,\", %s)"
+ : " + \" -B %s,%s,\" + %s",
+ this.trackName, this.typeName, getFieldName());
+ }
+
+ private boolean useOption() {
+ return !this.isRequired || (relatedFields.size() > 1);
+ }
+
+ @Override protected String getExclusiveOf() {
+ StringBuilder exclusiveOf = new StringBuilder();
+ // TODO: Stop allowing the generic "rodBind" triplets to satisfy the requirement after @Requires are fixed.
+ if (this.isRequired)
+ exclusiveOf.append(ROD_BIND_FIELD);
+ for (RodBindField relatedField: relatedFields)
+ if (relatedField != this) {
+ if (exclusiveOf.length() > 0)
+ exclusiveOf.append(",");
+ exclusiveOf.append(relatedField.getFieldName());
+ }
+ return exclusiveOf.toString();
+ }
+
+ public static List getRodArguments(Class extends Walker> walkerClass, RMDTrackManager rmdTrackManager) {
+ List argumentFields = new ArrayList();
+
+ List requires = WalkerManager.getRequiredMetaData(walkerClass);
+ List allows = WalkerManager.getAllowsMetaData(walkerClass);
+
+ for (RMD required: requires) {
+ List fields = new ArrayList();
+ String trackName = required.name();
+ if ("*".equals(trackName)) {
+ // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
+ //fields.add(new RodBindArgumentField(argumentDefinition, true));
+ } else {
+ for (String typeName: rmdTrackManager.getTrackRecordTypeNames(required.type()))
+ fields.add(new RodBindField(trackName, typeName, fields, true));
+ }
+ argumentFields.addAll(fields);
+ }
+
+ for (RMD allowed: allows) {
+ List fields = new ArrayList();
+ String trackName = allowed.name();
+ if ("*".equals(trackName)) {
+ // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers
+ //fields.add(new RodBindArgumentField(argumentDefinition, false));
+ } else {
+ for (String typeName: rmdTrackManager.getTrackRecordTypeNames(allowed.type()))
+ fields.add(new RodBindField(trackName, typeName, fields, true));
+ }
+ argumentFields.addAll(fields);
+ }
+
+ return argumentFields;
+ }
+}
diff --git a/scala/qscript/UnifiedGenotyperExample.scala b/scala/qscript/UnifiedGenotyperExample.scala
new file mode 100644
index 000000000..714d4a4fd
--- /dev/null
+++ b/scala/qscript/UnifiedGenotyperExample.scala
@@ -0,0 +1,63 @@
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
+
+class UnifiedGenotyperExample extends QScript {
+ qscript =>
+
+ @Input(doc="gatk jar file")
+ var gatkJar: File = _
+
+ @Input(doc="bam files", shortName="I")
+ var bamFiles: List[File] = Nil
+
+ @Input(doc="interval list", shortName="L")
+ var intervals: File = _
+
+ @Input(doc="referenceFile", shortName="R")
+ var referenceFile: File = _
+
+ @Argument(doc="filter names", shortName="filter")
+ var filterNames: List[String] = Nil
+
+ @Argument(doc="filter expressions", shortName="filterExpression")
+ var filterExpressions: List[String] = Nil
+
+ @Argument(doc="job queue", shortName="queue", required=false)
+ var jobQueue = "broad"
+
+ trait UnifiedGenotyperArguments extends CommandLineGATK {
+ this.jobQueue = qscript.jobQueue
+ this.jarFile = qscript.gatkJar
+ this.intervals = qscript.intervals
+ this.reference_sequence = qscript.referenceFile
+ }
+
+ def script = {
+ for (bam <- bamFiles) {
+ val ug = new UnifiedGenotyper with UnifiedGenotyperArguments
+ val vf = new VariantFiltration with UnifiedGenotyperArguments
+ val ve = new VariantEval with UnifiedGenotyperArguments
+
+ val pr = new PrintReads with UnifiedGenotyperArguments
+ pr.input_file :+= bam
+ pr.outputBamFile = swapExt(bam, "bam", "new.bam")
+ pr.scatterCount = 2
+ pr.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = new File("/path/to/jar") }
+ add(pr)
+
+ // Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh.
+ ug.scatterCount = 3
+ ug.input_file :+= bam
+ ug.out = swapExt(bam, "bam", "unfiltered.vcf")
+
+ vf.rodBind :+= RodBind("vcf", "VCF", ug.out)
+ vf.out = swapExt(bam, "bam", "filtered.vcf")
+
+ ve.rodBind :+= RodBind("vcf", "VCF", vf.out)
+ ve.out = swapExt(bam, "bam", "eval")
+
+ //add(ug, vf, ve)
+ }
+
+ }
+}
diff --git a/scala/qscript/depristo/1kg_table1.scala b/scala/qscript/depristo/1kg_table1.scala
index 89c989678..9434137a1 100755
--- a/scala/qscript/depristo/1kg_table1.scala
+++ b/scala/qscript/depristo/1kg_table1.scala
@@ -1,8 +1,16 @@
-import org.broadinstitute.sting.queue.QScript._
-// Other imports can be added here
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.{GenotypeMergeType, VariantMergeType}
+import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory.VE2TemplateType
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
-val UNIVERSAL_GATK_ARGS = " -l INFO " // -L 1
-val unusedArgs = setArgs(args)
+class Onekg_table1 extends QScript {
+ @Argument(doc="stage")
+ var stage: String = _
+
+ @Argument(doc="gatkJarFile")
+ var gatkJarFile: File = _
+
+trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile } // -L 1
class Target(project: String, snpVCF: String, indelVCF: String, calledGenome: Double, targetGenome: Double, pop: String, pilot : String, bam: String = null) {
def reportFile: String = List(pop, pilot, "report").mkString(".")
@@ -40,9 +48,9 @@ for ( (pop: String, called) <- p2Targets )
targets ::= new Target("SRP000032", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2")
// pilot 3
-for (POP <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
- val indels = if ( POP != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+POP+".exon.2010_06.genotypes.vcf.gz" else null
- targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + POP + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, POP, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(POP))
+for (pop <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) {
+ val indels = if ( pop != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null
+ targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop))
}
// merged files
@@ -57,7 +65,7 @@ val INTERVALS = Map(
"pilot3" -> "/humgen/gsa-hpprojects/1kg/1kg_pilot3/documents/CenterSpecificTargetLists/results/p3overlap.targets.b36.interval_list"
)
-def setupStage(stage: String) = stage match {
+def script = stage match {
case "ALL" =>
// initial pilot1 merge -- autosomes + x
for ( (pop: String,called) <- p1Targets ) {
@@ -106,36 +114,36 @@ def setupStage(stage: String) = stage match {
case _ => throw new Exception("Unknown stage" + stage)
}
-setupStage(unusedArgs(0))
-
-// Populate parameters passed in via -P
-setParams
-
-// Run the pipeline
-run
-
// Using scala anonymous classes
-class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends GatkFunction {
- @Input(doc="foo") var vcfFile: File = new File(vcfIn)
- @Output(doc="foo") var evalFile: File = new File(evalOut)
+class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends org.broadinstitute.sting.queue.extensions.gatk.VariantEval with UNIVERSAL_GATK_ARGS {
+ val vcfFile = new File(vcfIn)
+ this.rodBind :+= RodBind("eval", vcfType, vcfFile)
+ this.out = new File(evalOut)
+ this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod")
+ this.reportType = Some(VE2TemplateType.Grep)
+ this.evalModule :+= "CompOverlap"
+
override def dotString = "VariantEval: " + vcfFile.getName
- def commandLine = gatkCommandLine("VariantEval") + UNIVERSAL_GATK_ARGS + "-D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -reportType Grep -B eval,%s,%s -o %s -E CompOverlap".format(vcfType, vcfFile, evalFile)
}
class StatPop(target: Target) extends CommandLineFunction {
@Input(doc="foo") var snpVCF = new File(target.getSNPVCF)
@Input(doc="foo") var snpEval = new File(target.getSNPEval)
- @Input(doc="foo") var indelVCF = if (target.hasIndelVCF) new File(target.getIndelVCF) else {}
+ @Input(doc="foo", required=false) var indelVCF: File = if (target.hasIndelVCF) new File(target.getIndelVCF) else { null }
@Output(doc="foo") var reportFile: File = new File(target.reportFile)
override def dotString = "1kgStats: " + reportFile
def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/1kgStatsForCalls.py -v -a pilot_data.alignment.index -s pilot_data.sequence.index -r /broad/1KG/DCC/ftp/ -o " + target.reportFile + " " + target.extraArgs + (if (target.hasDOC) " -c " + target.getDOCSummaryFile else "") + " --snpsEval " + target.getSNPEval + (if (target.hasIndelVCF) " --indels " + target.getIndelVCF else "")
}
-class Combine(vcfsInArg: List[String], vcfOutPath: String) extends GatkFunction {
- @Input(doc="foo") var vcfs = vcfsInArg.map((x: String) => new File(x))
- @Output(doc="foo") var vcfFile: File = new File(vcfOutPath)
+class Combine(vcfsInArg: List[String], vcfOutPath: String) extends org.broadinstitute.sting.queue.extensions.gatk.CombineVariants with UNIVERSAL_GATK_ARGS {
+ val vcfs = vcfsInArg.map((x: String) => new File(x))
+ val vcfFile = new File(vcfOutPath)
+ this.variantmergeoption = Some(VariantMergeType.UNION)
+ this.genotypemergeoption = Some(GenotypeMergeType.PRIORITIZE)
+ this.out = vcfFile
+ this.rodBind ++= vcfs.map( input => RodBind(input.getName,"VCF",input) )
+ this.rod_priority_list = vcfs.map( _.getName ).mkString(",")
override def dotString = "CombineVariants: " + vcfs.map(_.getName).mkString(",") + " => " + vcfFile.getName
- def commandLine = gatkCommandLine("CombineVariants") + UNIVERSAL_GATK_ARGS + "-variantMergeOptions UNION -genotypeMergeOptions PRIORITIZE -o %s %s -priority %s".format(vcfFile, vcfs.map( input => " -B %s,VCF,%s".format(input.getName,input)).mkString(""), vcfs.map( _.getName ).mkString(","))
}
class MaskStats(pop: String) extends CommandLineFunction {
@@ -143,9 +151,19 @@ class MaskStats(pop: String) extends CommandLineFunction {
def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/maskStats.py masks/" + pop + ".mask.fa.gz -x MT -x Y -o " + outFile
}
-class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends GatkFunction {
- @Input(doc="foo") var bamFile: File = new File(bam)
- @Output(doc="foo") var docFile: File = new File(docOutPath)
+class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends org.broadinstitute.sting.queue.extensions.gatk.DepthOfCoverage with UNIVERSAL_GATK_ARGS {
+ val bamFile = new File(bam)
+ this.omitIntervalStatistics = true
+ this.omitDepthOutputAtEachBase = true
+ this.minBaseQuality = Some(0)
+ this.minMappingQuality = Some(0)
+ this.out = new File(docOutPath)
+ this.input_file :+= bamFile
+ if (interval != null) {
+ this.intervalsString :+= interval
+ this.excludeIntervalsString ++= List("MT", "Y")
+ }
+
override def dotString = "DOC: " + bamFile.getName
- def commandLine = gatkCommandLine("DepthOfCoverage") + UNIVERSAL_GATK_ARGS + "-omitIntervals -omitBaseOutput -mbq 0 -mmq 0 -o %s -I %s".format(docFile, bamFile) + (if (interval != null) " -XL MT -XL Y -L " + interval else "")
+}
}
diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q
index cf09621bb..0e390911c 100755
--- a/scala/qscript/fullCallingPipeline.q
+++ b/scala/qscript/fullCallingPipeline.q
@@ -1,240 +1,61 @@
-import org.broadinstitute.sting.queue.function.scattergather.{ContigScatterFunction, FixMatesGatherFunction}
+import org.broadinstitute.sting.gatk.DownsampleType
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model
+import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.QScript._
-// Other imports can be added here
-val unparsedArgs = setArgs(args)
+class fullCallingPipeline extends QScript {
+ qscript =>
-// very slow-to-run fast-to-write parse args function. Only worth changing if using lots of flags with lots of lookups.
+ @Argument(doc="contigIntervals", shortName="contigIntervals")
+ var contigIntervals: File = _
-def parseArgs(flag: String): String = {
- var retNext: Boolean = false
- for ( f <- unparsedArgs ) {
- if ( retNext ) {
- return f
- } else {
- if ( f.equals(flag) ) {
- retNext = true
- }
- }
- }
- return "None"
-}
+ @Argument(doc="numContigs", shortName="numContigs")
+ var numContigs: Int = _
-/////////////////////////////////////////////////
-// step one: we need to create a set of realigner targets, one for each bam file
-/////////////////////////////////////////////////
-// todo -- make me less of a hack that makes Khalid cry
-abstract class GatkFunctionLocal extends GatkFunction {
- if ( QScript.inputs("interval_list").size > 0 ) {
- this.intervals = QScript.inputs("interval_list").head
- } else {
- this.intervals = QScript.inputs("interval.list").head
- }
-}
+ @Argument(doc="project", shortName="project")
+ var project: String = _
-class RealignerTargetCreator extends GatkFunctionLocal {
- @Gather(classOf[SimpleTextGatherFunction])
- @Output(doc="Realigner targets")
- var realignerIntervals: File = _
+ @Input(doc="trigger", shortName="trigger", required=false)
+ var trigger: File = _
- def commandLine = gatkCommandLine("RealignerTargetCreator") + "-o %s".format(realignerIntervals)
-}
+ @Input(doc="refseqTable", shortName="refseqTable")
+ var refseqTable: File = _
-/////////////////////////////////////////////////
-// step two: we need to clean each bam file - gather will fix mates
-/////////////////////////////////////////////////
+ @Input(doc="dbsnpTable", shortName="dbsnpTable")
+ var dbsnpTable: File = _
-class IndelRealigner extends GatkFunction {
- @Input(doc="Intervals to clean")
- var intervalsToClean: File = _
- @Scatter(classOf[ContigScatterFunction])
- @Input(doc="Contig intervals")
- var contigIntervals: File = _
- @Gather(classOf[FixMatesGatherFunction])
- @Output(doc="Cleaned bam file")
- var cleanedBam: File = _
+ @Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
+ var picardFixMatesJar: File = _
- this.javaTmpDir = parseArgs("-tmpdir") // todo -- hack, move into script or something
+ @Input(doc="intervals")
+ var intervals: File = _
- override def freeze = {
- this.intervals = contigIntervals
- this.jobQueue = "long"
- super.freeze
- }
+ @Input(doc="bam files", shortName="I")
+ var bamFiles: List[File] = Nil
- def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals)
-}
+ @Input(doc="gatk jar")
+ var gatkJar: File = _
-/////////////////////////////////////////////////
-// step three: we need to call (multisample) over all bam files
-/////////////////////////////////////////////////
-
-class UnifiedGenotyper extends GatkFunctionLocal {
- @Input(doc="An optional trigger track (trigger emit will be set to 0)",required=false)
- var trigger: File = _
- @Input(doc="A list of comparison files for annotation",required=false)
- var compTracks: List[(String,File)] = Nil
- @Input(doc="Calling confidence level (may change depending on depth and number of samples)")
- var callConf: Int = _
- @Gather(classOf[SimpleTextGatherFunction])
- @Output(doc="raw vcf")
- var rawVCF: File = _
-
- // todo -- add input for comps, triggers, etc
-
- def commandLine = gatkCommandLine("UnifiedGenotyper") + "-G Standard -A MyHaplotypeScore -varout %s".format(rawVCF) +
- " -stand_emit_conf 10 -mmq 20 -mbq 20 -dt EXPERIMENTAL_BY_SAMPLE -dcov 200" +
- " -stand_call_conf %d".format(callConf) +
- ( if (trigger == null ) "" else " -trig_call_conf %d -trig_emit_conf 0 -B trigger,VCF,%s".format(callConf,trigger) ) +
- makeCompString
-
- def makeCompString = {
- var S: String = ""
- for ( tup <- compTracks ) {
- S += " -B comp%s,VCF,%s".format(tup._1,tup._2)
- }
- S
- }
-}
-
-/////////////////////////////////////////////////
-// step four: we need to call indels (multisample) over all bam files
-/////////////////////////////////////////////////
-
-class UnifiedGenotyperIndels extends GatkFunctionLocal {
- @Gather(classOf[SimpleTextGatherFunction])
- @Output(doc="indel vcf")
- var indelVCF: File = _
- // todo -- add inputs for the indel genotyper
-
- def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS".format(indelVCF)
-}
-
-/////////////////////////////////////////////////
-// step five: we need to filter variants on cluster and with indel mask
-/////////////////////////////////////////////////
-class VariantFiltration extends GatkFunctionLocal {
- @Input(doc="A VCF file to filter")
- var unfilteredVCF: File = _
- @Input(doc="An interval mask to use to filter indels")
- var indelMask: File = _
- @Input(doc="Filter names",required=false)
- var filterNames: List[String] = Nil
- @Input(doc="Filter expressions",required=false)
- var filterExpressions: List[String] = Nil
- @Output(doc="The input VCF file, but filtered")
- var filteredVCF: File = _
- // to do -- snp cluster args?
-
- def commandLine = gatkCommandLine("VariantFiltration") + "-B variant,VCF,%s -B mask,VCF,%s --maskName NearIndel --clusterWindowSize 20 --clusterSize 7 -o %s".format(unfilteredVCF,indelMask,filteredVCF) +
- "%s%s".format(repeat(" -filterName ",filterNames), repeat(" -filterExpression ",filterExpressions))
-}
-
-/////////////////////////////////////////////////
-// step six: we need to generate gaussian clusters with the optimizer
-/////////////////////////////////////////////////
-class GenerateVariantClusters extends GatkFunctionLocal {
- @Input(doc="A VCF that has been filtered for clusters and indels")
- var initialFilteredVCF: File = _
- @Output(doc="Variant cluster file generated from input VCF")
- var clusterFile: File = _
- // todo -- args for annotations?
- // todo -- args for resources (properties file)
-
- override def freeze = {
- // todo -- hacky change in memory limit -- fix this when more official roads to do this are in place
- this.memoryLimit = Some(8)
- this.jobQueue = "hugemem"
- super.freeze
- }
-
- def commandLine = gatkCommandLine("GenerateVariantClusters") + "-an QD -an SB -an MyHaplotypeScore -an HRun " +
- "-resources /humgen/gsa-scr1/chartl/sting/R -B input,VCF,%s -clusterFile %s".format(initialFilteredVCF,clusterFile)
-}
-
-/////////////////////////////////////////////////
-// step seven: we need to apply gaussian clusters to our variants
-/////////////////////////////////////////////////
-class ApplyGaussianClusters extends GatkFunctionLocal {
- @Input(doc="A VCF file to which to apply clusters")
- var inputVCF: File = _
- @Input(doc="A variant cluster file")
- var clusterFile: File = _
- @Output(doc="A quality-score recalibrated VCF file")
- var recalibratedVCF: File = _
- // todo -- inputs for Ti/Tv expectation and other things
-
- def commandLine = gatkCommandLine("VariantRecalibrator") + "--target_titv 2.1 -resources /humgen/gsa-scr1/chartl/sting/R " +
- "-B input,VCF,%s -clusterFile %s -output %s".format(inputVCF,clusterFile,recalibratedVCF)
-}
-
-/////////////////////////////////////////////////
-// step eight: we need to make tranches out of the recalibrated qualities
-/////////////////////////////////////////////////
-class ApplyVariantCuts extends GatkFunctionLocal {
- @Input(doc="A VCF file that has been recalibrated")
- var recalibratedVCF: File = _
- @Output(doc="A VCF file that has had tranches marked")
- var tranchedVCF: File = _
- @Output(doc="A tranch dat file")
- var tranchFile: File = _
- // todo -- fdr inputs, etc
-
- def commandLine = gatkCommandLine("ApplyVariantCuts") +
- "-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0".format(recalibratedVCF,tranchedVCF,tranchFile)
-}
-
-/////////////////////////////////////////////////
-// step nine: we need to annotate variants using the annotator [or maf, for now]
-/////////////////////////////////////////////////
-class GenomicAnnotator extends GatkFunctionLocal {
- @Input(doc="A VCF file to be annotated")
- var inputVCF: File = _
- @Input(doc="Refseq input table to use with the annotator")
- var refseqTable: File = _
- @Input(doc="Dbsnp input table to use with the annotator")
- var dbsnpTable: File = _
- @Gather(classOf[SimpleTextGatherFunction])
- @Output(doc="A genomically annotated VCF file")
- var annotatedVCF: File = _
-
- def commandLine = gatkCommandLine("GenomicAnnotator") + " -B variant,VCF,%s -B refseq,AnnotatorInputTable,%s -B dbsnp,AnnotatorInputTable,%s -vcf %s -s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet -BTI variant".format(inputVCF,refseqTable,dbsnpTable,annotatedVCF)
-}
-
-/////////////////////////////////////////////////
-// step ten: we need to evaluate variants with variant eval
-/////////////////////////////////////////////////
-class VariantEval extends GatkFunctionLocal {
- @Input(doc="An optimized vcf file to evaluate")
- var optimizedVCF: File = _
- @Input(doc="A hand-fitlered vcf file to evaluate")
- var handFilteredVCF: File = _
- @Output(doc="An evaluation file")
- var evalOutput: File = _
- // todo -- make comp tracks command-line arguments or properties
-
- def commandLine = gatkCommandLine("VariantEval") + "-B evalOptimized,VCF,%s -B evalHandFiltered,VCF,%s -E CountFunctionalClasses -E CompOverlap -E CountVariants -E TiTvVariantEvaluator -o %s".format(optimizedVCF,handFilteredVCF,evalOutput)
+trait CommandLineGATKArgs extends CommandLineGATK {
+ this.intervals = qscript.intervals
+ this.jarFile = qscript.gatkJar
}
// ------------ SETUP THE PIPELINE ----------- //
// todo -- the unclean and clean pipelines are the same, so the code can be condensed significantly
+ def script = {
+ val projectBase: String = qscript.project
+ val cleanedBase: String = projectBase + ".cleaned"
+ val uncleanedBase: String = projectBase + ".uncleaned"
// there are commands that use all the bam files
+ var cleanBamFiles = List.empty[File]
-val cleanSNPCalls = new UnifiedGenotyper
-val uncleanSNPCalls = new UnifiedGenotyper
-val cleanIndelCalls = new UnifiedGenotyperIndels
-val uncleanIndelCalls = new UnifiedGenotyperIndels
-
-for ( bam <- inputs("bam") ) {
+for ( bam <- bamFiles ) {
// put unclean bams in unclean genotypers
- uncleanSNPCalls.bamFiles :+= bam
- uncleanIndelCalls.bamFiles :+= bam
-
// in advance, create the extension files
val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list")
@@ -242,86 +63,129 @@ for ( bam <- inputs("bam") ) {
// create the cleaning commands
- val targetCreator = new RealignerTargetCreator
- targetCreator.bamFiles :+= bam
- targetCreator.realignerIntervals = indel_targets
+ val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs
+ targetCreator.input_file :+= bam
+ targetCreator.out = indel_targets
- val realigner = new IndelRealigner
- realigner.bamFiles = targetCreator.bamFiles
- realigner.contigIntervals = new File(parseArgs("-contigIntervals"))
- realigner.intervalsToClean = targetCreator.realignerIntervals
- realigner.scatterCount = parseArgs("-numContigs").toInt
- realigner.cleanedBam = cleaned_bam
+ val realigner = new IndelRealigner with CommandLineGATKArgs
+ realigner.input_file = targetCreator.input_file
+ realigner.intervals = qscript.contigIntervals
+ //realigner.targetIntervals = targetCreator.out
+ realigner.targetIntervals = targetCreator.out.getAbsolutePath
+ realigner.scatterCount = qscript.numContigs
+ realigner.out = cleaned_bam
+ realigner.scatterClass = classOf[ContigScatterFunction]
+ realigner.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = qscript.picardFixMatesJar }
+ realigner.jobQueue = "long"
// put clean bams in clean genotypers
- cleanSNPCalls.bamFiles :+= realigner.cleanedBam
- cleanIndelCalls.bamFiles :+= realigner.cleanedBam
+ cleanBamFiles :+= realigner.out
add(targetCreator,realigner)
}
+ endToEnd(uncleanedBase,bamFiles)
+ endToEnd(cleanedBase,cleanBamFiles)
+ }
-val projectBase: String = parseArgs("-project")
-val cleanedBase: String = projectBase + ".cleaned"
-val uncleanedBase: String = projectBase + ".uncleaned"
-
-def endToEnd(base: String, snps: UnifiedGenotyper, indels: UnifiedGenotyperIndels) = {
+def endToEnd(base: String, bamFiles: List[File]) = {
// step through the un-indel-cleaned graph:
// 1a. call snps and indels
- snps.rawVCF = new File(base+".vcf")
- snps.callConf = 30
- snps.trigger = new File(parseArgs("-trigger"))
+ val snps = new UnifiedGenotyper with CommandLineGATKArgs
+ snps.input_file = bamFiles
+ snps.group :+= "Standard"
+ snps.annotation :+= "MyHamplotypeScore"
+ snps.variants_out = new File(base+".vcf")
+ snps.standard_min_confidence_threshold_for_emitting = Some(10)
+ snps.min_mapping_quality_score = Some(20)
+ snps.min_base_quality_score = Some(20)
+ snps.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
+ snps.downsample_to_coverage = Some(200)
+ // todo -- add input for comps, triggers, etc
+ if (qscript.trigger != null) {
+ snps.trigger_min_confidence_threshold_for_calling = Some(30)
+ snps.rodBind :+= RodBind("trigger", "VCF", qscript.trigger)
+ }
// todo -- hack -- get this from the command line, or properties
- snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") )
- snps.compTracks :+= ( "comp1KG_ALL",new File(parseArgs("-trigger") ) )
+ snps.rodBind :+= RodBind( "comp1KG_CEU", "VCF", new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") )
+
+
+ // TODO: what is the 1KG_ALL track?
+ //snps.rodBind :+= RodBind( "comp1KG_ALL", "VCF", qscript.trigger )
+
+
snps.scatterCount = 100
- indels.indelVCF = new File(base+".indels.vcf")
+ val indels = new UnifiedGenotyper with CommandLineGATKArgs
+ indels.input_file = bamFiles
+ indels.variants_out = new File(base+".indels.vcf")
+ indels.genotype_model = Some(Model.INDELS)
indels.scatterCount = 100
+ // todo -- add inputs for the indel genotyper
// 1b. genomically annotate SNPs -- slow, but scatter it
- val annotated = new GenomicAnnotator
- annotated.inputVCF = snps.rawVCF
- annotated.refseqTable = new File(parseArgs("-refseqTable"))
- annotated.dbsnpTable = new File(parseArgs("-dbsnpTable"))
- annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf")
+ val annotated = new GenomicAnnotator with CommandLineGATKArgs
+ annotated.rodBind :+= RodBind("variant", "VCF", snps.variants_out)
+ annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
+ annotated.rodBind :+= RodBind("dbsnp", "AnnotatorInputTable", qscript.dbsnpTable)
+ annotated.vcfOutput = swapExt(snps.variants_out,".vcf",".annotated.vcf")
+ annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
+ annotated.rodToIntervalTrackName = "variant"
annotated.scatterCount = 100
// 2.a filter on cluster and near indels
- val masker = new VariantFiltration
- masker.unfilteredVCF = annotated.annotatedVCF
- masker.indelMask = indels.indelVCF
- masker.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".indel.masked.vcf")
+ val masker = new VariantFiltration with CommandLineGATKArgs
+ masker.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput)
+ masker.rodBind :+= RodBind("mask", "VCF", indels.variants_out)
+ masker.maskName = "NearIndel"
+ masker.clusterWindowSize = Some(20)
+ masker.clusterSize = Some(7)
+ masker.out = swapExt(annotated.vcfOutput,".vcf",".indel.masked.vcf")
+ // todo -- snp cluster args?
// 2.b hand filter with standard filter
- val handFilter = new VariantFiltration
- handFilter.unfilteredVCF = annotated.annotatedVCF
- handFilter.indelMask = indels.indelVCF
- handFilter.filterNames = List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun")
- handFilter.filterExpressions = List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"")
- handFilter.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".handfiltered.vcf")
+ val handFilter = new VariantFiltration with CommandLineGATKArgs
+ handFilter.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput)
+ handFilter.rodBind :+= RodBind("mask", "VCF", indels.variants_out)
+ handFilter.filterName ++= List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun")
+ handFilter.filterExpression ++= List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"")
+ handFilter.out = swapExt(annotated.vcfOutput,".vcf",".handfiltered.vcf")
// 3.i generate gaussian clusters on the masked vcf
- val clusters = new GenerateVariantClusters
- clusters.initialFilteredVCF = masker.filteredVCF
- clusters.clusterFile = swapExt(snps.rawVCF,".vcf",".cluster")
+ val clusters = new GenerateVariantClusters with CommandLineGATKArgs
+ clusters.rodBind :+= RodBind("input", "VCF", masker.out)
+ //clusters.clusterFile = swapExt(snps.variants_out,".vcf",".cluster")
+ val clusters_clusterFile = swapExt(snps.variants_out,".vcf",".cluster")
+ clusters.clusterFile = clusters_clusterFile.getAbsolutePath
+ clusters.memoryLimit = Some(8)
+ clusters.jobQueue = "hugemem"
+ // todo -- args for annotations?
+ // todo -- args for resources (properties file)
+ clusters.use_annotation ++= List("QD", "SB", "MyHaplotypeScore", "HRun")
+ clusters.path_to_resources = "/humgen/gsa-scr1/chartl/sting/R"
// 3.ii apply gaussian clusters to the masked vcf
- val recalibrate = new ApplyGaussianClusters
+ val recalibrate = new VariantRecalibrator with CommandLineGATKArgs
recalibrate.clusterFile = clusters.clusterFile
- recalibrate.inputVCF = masker.filteredVCF
- recalibrate.recalibratedVCF = swapExt(masker.filteredVCF,".vcf",".optimized.vcf")
+ recalibrate.rodBind :+= RodBind("input", "VCF", masker.out)
+ recalibrate.out = swapExt(masker.out,".vcf",".optimized.vcf")
+ // todo -- inputs for Ti/Tv expectation and other things
+ recalibrate.target_titv = Some(2.1)
// 3.iii apply variant cuts to the clusters
- val cut = new ApplyVariantCuts
- cut.recalibratedVCF = recalibrate.recalibratedVCF
- cut.tranchedVCF = swapExt(recalibrate.recalibratedVCF,".vcf",".tranched.vcf")
- cut.tranchFile = swapExt(recalibrate.recalibratedVCF,".vcf",".tranch")
+ val cut = new ApplyVariantCuts with CommandLineGATKArgs
+ cut.rodBind :+= RodBind("input", "VCF", recalibrate.out)
+ //cut.outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf")
+ //cut.tranchesFile = swapExt(recalibrate.out,".vcf",".tranch")
+ val cut_outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf")
+ val cut_tranchesFile = swapExt(recalibrate.out,".vcf",".tranch")
+ cut.outputVCFFile = cut_outputVCFFile.getAbsolutePath
+ cut.tranchesFile = cut_tranchesFile.getAbsolutePath
+ // todo -- fdr inputs, etc
+ cut.fdr_filter_level = Some(10)
// 4. Variant eval the cut and the hand-filtered vcf files
- val eval = new VariantEval
- eval.optimizedVCF = cut.tranchedVCF
- eval.handFilteredVCF = handFilter.filteredVCF
- eval.evalOutput = new File(base+".eval")
+ val eval = new VariantEval with CommandLineGATKArgs
+ eval.rodBind :+= RodBind("evalOptimized", "VCF", cut_outputVCFFile)
+ eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out)
+ // todo -- make comp tracks command-line arguments or properties
+ eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator")
+ eval.out = new File(base+".eval")
add(snps,indels,annotated,masker,handFilter,clusters,recalibrate,cut,eval)
}
-endToEnd(uncleanedBase,uncleanSNPCalls,uncleanIndelCalls)
-endToEnd(cleanedBase,cleanSNPCalls,cleanIndelCalls)
-
-setParams
-run
+}
diff --git a/scala/qscript/recalibrate.scala b/scala/qscript/recalibrate.scala
index 539fc27ef..df4cb5f57 100755
--- a/scala/qscript/recalibrate.scala
+++ b/scala/qscript/recalibrate.scala
@@ -1,73 +1,77 @@
-import java.io.File
-import org.broadinstitute.sting.queue.QScript._
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
import org.apache.commons.io.FilenameUtils;
-// Other imports can be added here
-val unusedArgs = setArgs(args)
+class recalibrate extends QScript {
+ @Input(doc="bamIn", shortName="I")
+ var bamIns: List[File] = Nil
+
+ @Argument(doc="scatter")
+ var scatter = false
-def runPipeline(arg: String) = {
- val scatter = arg == "scatter"
+ @Argument(doc="gatk jar file")
+ var gatkJarFile: File = _
- for (bamIn <- inputs(".bam")) {
+def script = {
+ for (bamIn <- bamIns) {
val root = bamIn.getPath()
val bamRoot = FilenameUtils.removeExtension(root);
val recalData = new File(bamRoot + ".recal_data.csv")
val recalBam = new File(bamRoot + ".recal.bam")
val recalRecalData = new File(bamRoot + ".recal.recal_data.csv")
//add(new CountCovariates(root, recalData, "-OQ"))
- val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam, "-OQ")
+ val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true }
if ( scatter ) {
tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list")
tableRecal.scatterCount = 25
}
add(tableRecal)
add(new Index(recalBam))
- add(new CountCovariates(recalBam, recalRecalData, "-nt 4"))
+ add(new CountCovariates(recalBam, recalRecalData) { num_threads = Some(4) })
add(new AnalyzeCovariates(recalData, new File(recalData.getPath() + ".analyzeCovariates")))
add(new AnalyzeCovariates(recalRecalData, new File(recalRecalData.getPath() + ".analyzeCovariates")))
}
}
-runPipeline(unusedArgs(0))
-
-// Populate parameters passed in via -P
-setParams
-
-// Run the pipeline
-run
-
def bai(bam: File) = new File(bam + ".bai")
-class Index(bamIn: File) extends GatkFunction {
- @Input(doc="foo") var bam = bamIn
- @Output(doc="foo") var bamIndex = bai(bamIn)
- memoryLimit = Some(1)
- override def dotString = "Index: %s".format(bamIn.getName)
- def commandLine = "samtools index %s".format(bam)
+class Index(bamIn: File) extends BamIndexFunction {
+ bamFile = bamIn
}
-class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction {
- @Input(doc="foo") var bam = bamIn
- @Input(doc="foo") var bamIndex = bai(bamIn)
- @Output(doc="foo") var recalData = recalDataIn
- memoryLimit = Some(4)
- override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, args)
- def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData)
+class CountCovariates(bamIn: File, recalDataIn: File) extends org.broadinstitute.sting.queue.extensions.gatk.CountCovariates {
+ this.jarFile = gatkJarFile
+ this.input_file :+= bamIn
+ this.recal_file = recalDataIn
+ this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod")
+ this.logging_level = "INFO"
+ this.max_reads_at_locus = Some(20000)
+ this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
+ this.memoryLimit = Some(4)
+
+ override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, if (this.num_threads.isDefined) "-nt " + this.num_threads else "")
}
-class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args: String = "") extends GatkFunction {
- @Input(doc="foo") var bamIn = bamInArg
- @Input(doc="foo") var recalData = recalDataIn
- @Gather(classOf[BamGatherFunction])
- @Output(doc="foo") var bamOut = bamOutArg
- override def dotString = "TableRecalibrate: %s => %s [args %s]".format(bamInArg.getName, bamOutArg.getName, args)
- memoryLimit = Some(2)
- def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath())
+class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) extends org.broadinstitute.sting.queue.extensions.gatk.TableRecalibration {
+ this.jarFile = gatkJarFile
+ this.input_file :+= bamInArg
+ this.recal_file = recalDataIn
+ this.output_bam = bamOutArg
+ this.logging_level = "INFO"
+ this.memoryLimit = Some(2)
+
+ override def dotString = "TableRecalibrate: %s => %s".format(bamInArg.getName, bamOutArg.getName, if (this.useOriginalQualities) " -OQ" else "")
}
-class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction {
- @Input(doc="foo") var recalData = recalDataIn
- memoryLimit = Some(4)
+class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends org.broadinstitute.sting.queue.extensions.gatk.AnalyzeCovariates {
+ this.jarFile = new File("/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar")
+ this.recal_file = recalDataIn
+ this.output_dir = outputDir.toString
+ this.path_to_resources = "/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/"
+ this.ignoreQ = Some(5)
+ this.path_to_Rscript = "/broad/tools/apps/R-2.6.0/bin/Rscript"
+ this.memoryLimit = Some(4)
+
override def dotString = "AnalyzeCovariates: %s".format(recalDataIn.getName)
- def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir)
+}
}
diff --git a/scala/qscript/rpoplin/variantRecalibrator.scala b/scala/qscript/rpoplin/variantRecalibrator.scala
index 09e1e34f4..21a267465 100755
--- a/scala/qscript/rpoplin/variantRecalibrator.scala
+++ b/scala/qscript/rpoplin/variantRecalibrator.scala
@@ -1,7 +1,11 @@
-import org.broadinstitute.sting.queue.QScript._
-// Other imports can be added here
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
-setArgs(args)
+class variantRecalibrator extends QScript {
+ @Argument(doc="gatkJarFile")
+ var gatkJarFile: File = _
+
+ def script = {
val gList = List(30)
val sList = List(0.0001, 0.01)
@@ -13,66 +17,40 @@ for (g: Int <- gList) {
for (d: Double <- dList) {
for(b: Double <- bList) {
- // Using classes defined below
+ // Using classes defined by QueueGATKExtensions.jar
val gvc = new GenerateVariantClusters
val vr = new VariantRecalibrator
- gvc.maxGaussians = g
- gvc.shrinkage = s
- gvc.dirichlet = d
- gvc.clusterFile = new File("g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b))
- gvc.jobOutputFile = swapExt(gvc.clusterFile, ".cluster", ".gvc.out")
+ gvc.jarFile = gatkJarFile
+ gvc.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf"))
+ gvc.logging_level = "INFO"
+ gvc.intervalsString :+= "20"
+ gvc.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
+ gvc.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
+ gvc.maxGaussians = Some(g)
+ gvc.shrinkage = Some(s)
+ gvc.shrinkageFormat = "%.6f"
+ gvc.dirichlet = Some(d)
+ gvc.dirichletFormat = "%.6f"
+ gvc.clusterFile = "g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b)
+ gvc.jobOutputFile = new File(gvc.clusterFile.stripSuffix(".cluster") + ".gvc.out")
+ vr.jarFile = gatkJarFile
+ vr.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf"))
+ vr.logging_level = "INFO"
+ vr.intervalsString :+= "20"
+ vr.target_titv = Some(2.1)
+ vr.ignore_filter :+= "HARD_TO_VALIDATE"
+ vr.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
vr.clusterFile = gvc.clusterFile
- vr.jobOutputFile = swapExt(vr.clusterFile, ".cluster", ".vr.out")
- vr.backOff = b
+ vr.jobOutputFile = new File(vr.clusterFile.stripSuffix(".cluster") + ".vr.out")
+ vr.backOff = Some(b)
+ vr.backOffFormat = "%.2f"
add(gvc, vr)
}
}
}
}
-
-// Populate parameters passed in via -P
-setParams
-
-// Run the pipeline
-run
-
-
-
-// A very basic GATK UnifiedGenotyper
-class GenerateVariantClusters extends GatkFunction {
- var maxGaussians: Int = _
- var shrinkage: Double = _
- var dirichlet: Double = _
-
- @Output
- var clusterFile: File = _
-
- def commandLine = gatkCommandLine("GenerateVariantClusters") +
- "-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " +
- "-l INFO -L 20 -an QD -an SB -an HaplotypeScore -an HRun " +
- "-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " +
- "-mG %d ".format(maxGaussians) +
- "-shrinkage %.6f ".format(shrinkage) +
- "-dirichlet %.6f ".format(dirichlet) +
- "-clusterFile %s".format(clusterFile)
-}
-
-// A basic GATK VariantFiltration
-class VariantRecalibrator extends GatkFunction {
- var backOff: Double = _
-
- @Input
- var clusterFile: File = _
-
- def commandLine = gatkCommandLine("VariantRecalibrator") +
- "-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " +
- "-l INFO -L 20 -titv 2.1 " +
- "--ignore_filter HARD_TO_VALIDATE " +
- "-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " +
- "-backOff %.2f ".format(backOff) +
- "-clusterFile %s ".format(clusterFile) +
- "-output %s".format(clusterFile)
+ }
}
diff --git a/scala/qscript/unifiedgenotyper_example.properties b/scala/qscript/unifiedgenotyper_example.properties
deleted file mode 100644
index 4c4668db1..000000000
--- a/scala/qscript/unifiedgenotyper_example.properties
+++ /dev/null
@@ -1,7 +0,0 @@
-gatkJar = /humgen/gsa-hpprojects/GATK/bin/current/GenomeAnalysisTK.jar
-referenceFile = /path/to/reference.fasta
-dbsnp = /path/to/dbsnp
-intervals = /path/to/my.interval_list
-jobNamePrefix = Q
-memoryLimit = 2
-gatkLoggingLevel = INFO
diff --git a/scala/qscript/unifiedgenotyper_example.scala b/scala/qscript/unifiedgenotyper_example.scala
deleted file mode 100644
index d21a1ef6c..000000000
--- a/scala/qscript/unifiedgenotyper_example.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-import org.broadinstitute.sting.queue.QScript._
-
-setArgs(args)
-
-for (bam <- inputs("bam")) {
- val ug = new UnifiedGenotyper
- val vf = new VariantFiltration
- val ve = new GatkFunction {
- @Input(doc="vcf") var vcfFile: File = _
- @Output(doc="eval") var evalFile: File = _
- def commandLine = gatkCommandLine("VariantEval") + "-B eval,VCF,%s -o %s".format(vcfFile, evalFile)
- }
-
- // Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh.
- ug.scatterCount = 3
- ug.bamFiles :+= bam
- ug.vcfFile = swapExt(bam, "bam", "unfiltered.vcf")
-
- vf.vcfInput = ug.vcfFile
- vf.vcfOutput = swapExt(bam, "bam", "filtered.vcf")
-
- ve.vcfFile = vf.vcfOutput
- ve.evalFile = swapExt(bam, "bam", "eval")
-
- add(ug, vf, ve)
-}
-
-setParams
-run
-
-
-class UnifiedGenotyper extends GatkFunction {
- @Output(doc="vcf")
- @Gather(classOf[SimpleTextGatherFunction])
- var vcfFile: File = _
- def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s".format(vcfFile)
-}
-
-class VariantFiltration extends GatkFunction {
- @Input(doc="input vcf")
- var vcfInput: File = _
-
- @Input(doc="filter names")
- var filterNames: List[String] = Nil
-
- @Input(doc="filter expressions")
- var filterExpressions: List[String] = Nil
-
- @Output(doc="output vcf")
- var vcfOutput: File = _
-
- def commandLine = gatkCommandLine("VariantFiltration") + "%s%s -B variant,VCF,%s -o %s"
- .format(repeat(" -filterName ", filterNames), repeat(" -filterExpression ", filterExpressions), vcfInput, vcfOutput)
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/QArguments.scala b/scala/src/org/broadinstitute/sting/queue/QArguments.scala
deleted file mode 100755
index 5c921231b..000000000
--- a/scala/src/org/broadinstitute/sting/queue/QArguments.scala
+++ /dev/null
@@ -1,105 +0,0 @@
-package org.broadinstitute.sting.queue
-
-import collection.mutable.ListBuffer
-import collection.JavaConversions._
-import org.broadinstitute.sting.queue.util.Logging
-import org.broadinstitute.sting.utils.text.XReadLines
-import java.io.{FileInputStream, File}
-import java.util.Properties
-
-class QArguments(args: Array[String]) {
- var bsubAllJobs = false
- var bsubWaitJobs = false
- var dryRun = false
- val scripts = new ListBuffer[String]
- var inputPaths = List.empty[File]
- var properties = Map.empty[String, String]
-
- val userArgs = parseArgs(args)
-
- private def parseArgs(args: Array[String]) = {
- var filtered = new ListBuffer[String]
- filtered.appendAll(args)
-
- if (isFlagged(filtered, "-debug"))
- Logging.setDebug
- if (isFlagged(filtered, "-trace"))
- Logging.setTrace
- if (isFlagged(filtered, "-dry"))
- dryRun = true
- if (isFlagged(filtered, "-bsub"))
- bsubAllJobs = true
- if (isFlagged(filtered, "-bsubWait"))
- bsubWaitJobs = true
- for (arg <- getArgs(filtered, "-P"))
- addProperties(arg)
- for (arg <- getArgs(filtered, "-I"))
- addFile(arg)
- for (arg <- getArgs(filtered, "-S"))
- scripts.append(arg)
-
- List(filtered:_*)
- }
-
- private def isFlagged(filtered: ListBuffer[String], search: String) = {
- var found = false
- var index = 0
- while (0 <= index && index < filtered.size) {
- index = filtered.indexOf(search)
- if (index >= 0) {
- found = true
- filtered.remove(index)
- }
- }
- found
- }
-
- private def getArgs(filtered: ListBuffer[String], search: String) = {
- var found = new ListBuffer[String]
- var index = 0
- while (0 <= index && index < filtered.size) {
- index = filtered.indexOf(search)
- if (index >= 0) {
- found.append(filtered(index+1))
- filtered.remove(index, 2)
- }
- }
- found
- }
-
- def addProperties(arg: String) = {
- var file = new File(arg)
- if (arg.contains("=") && !file.exists) {
- val tokens = arg.split("=", 2)
- properties += tokens(0) -> tokens(1)
- } else if (arg.endsWith(".properties")) {
- if (!file.exists)
- throw new QException("File not found: " + file.getAbsolutePath)
- var props = new Properties
- props.load(new FileInputStream(file))
- for ((name, value) <- props)
- properties += name -> value
- } else {
- throw new QException("Invalid property: " + arg)
- }
- }
-
- def addFile(arg: String): Unit = {
- var file = new File(arg)
- inputPaths :+= file
- if (arg.endsWith(".list"))
- new XReadLines(file).iterator.foreach(addFile(_))
- }
-}
-
-object QArguments {
- def strip(filtered: ListBuffer[String], search: String) = {
- var index = 0
- while (0 <= index && index < filtered.size) {
- index = filtered.indexOf(search)
- if (index >= 0) {
- filtered.remove(index, 2)
- }
- }
- }
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index f59ea960b..1e4a05cad 100755
--- a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -1,47 +1,115 @@
package org.broadinstitute.sting.queue
-import tools.nsc.MainGenericRunner
-import org.broadinstitute.sting.queue.util.ClasspathUtils
-import collection.mutable.ListBuffer
-import org.broadinstitute.sting.queue.util.Logging
+import java.io.File
+import java.util.Arrays
+import org.broadinstitute.sting.queue.engine.QGraph
+import org.broadinstitute.sting.commandline.{ClassType, Input, Argument, CommandLineProgram}
+import org.broadinstitute.sting.queue.util.{Logging, ScalaCompoundArgumentTypeDescriptor}
-object QCommandLine extends Application with Logging {
- var usage = """usage: java -jar Queue.jar [-P name=value] [-P file.properties] [-I input.file] [-I input_files.list] [-bsub] [-bsubWait] [-dry] [-debug] -S pipeline.scala"""
+/**
+ * Entry point of Queue. Compiles and runs QScripts passed in to the command line.
+ */
+class QCommandLine extends CommandLineProgram with Logging {
+ @Input(fullName="script", shortName="S", doc="QScript scala file", required=true)
+ @ClassType(classOf[File])
+ private var scripts = List.empty[File]
- override def main(args: Array[String]) = {
- val qArgs: QArguments = try {
- new QArguments(args)
- } catch {
- case exception => {
- println(exception)
- println(usage)
- System.exit(-1)
- }
- null
+ @Argument(fullName="bsub_all_jobs", shortName="bsub", doc="Use bsub to submit jobs", required=false)
+ private var bsubAllJobs = false
+
+ @Argument(fullName="bsub_wait_jobs", shortName="bsubWait", doc="Wait for bsub submitted jobs before exiting", required=false)
+ private var bsubWaitJobs = false
+
+ @Argument(fullName="run_scripts", shortName="run", doc="Run QScripts", required=false)
+ private var run = false
+
+ @Argument(fullName="dot_graph", shortName="dot", doc="Outputs the queue graph to a .dot file. See: http://en.wikipedia.org/wiki/DOT_language", required=false)
+ private var queueDot: File = _
+
+ /**
+ * Takes the QScripts passed in, runs their script() methods, retrieves their generated
+ * functions, and then builds and runs a QGraph based on the dependencies.
+ */
+ def execute = {
+ val qGraph = new QGraph
+ qGraph.dryRun = !run
+ qGraph.bsubAllJobs = bsubAllJobs
+ qGraph.bsubWaitJobs = bsubWaitJobs
+
+ val scripts = qScriptManager.createScripts()
+ for (script <- scripts) {
+ logger.info("Scripting " + qScriptManager.getName(script.getClass.asSubclass(classOf[QScript])))
+ loadArgumentsIntoObject(script)
+ script.script
+ script.functions.foreach(qGraph.add(_))
+ logger.info("Added " + script.functions.size + " functions")
}
- logger.debug("starting")
-
- if (qArgs.scripts.size == 0) {
- println("Error: Missing script")
- println(usage)
- System.exit(-1)
+ logger.info("Binding functions")
+ qGraph.fillIn
+ if (queueDot != null) {
+ logger.info("Generating " + queueDot)
+ qGraph.renderToDot(queueDot)
}
- // NOTE: Something in MainGenericRunner is exiting the VM.
- if (qArgs.scripts.size != 1) {
- println("Error: Only one script can be run at a time")
- println(usage)
- System.exit(-1)
- }
+ logger.info("Running generated graph")
+ qGraph.run
+ logger.info("Done")
+ 0
+ }
- val newArgs = new ListBuffer[String]
- newArgs.appendAll(args)
- QArguments.strip(newArgs, "-S")
- newArgs.prepend("-nocompdaemon", "-classpath", ClasspathUtils.manifestAwareClassPath, qArgs.scripts.head)
- MainGenericRunner.main(newArgs.toArray)
+ /**
+ * Returns true as QScripts are located and compiled.
+ * @return true
+ */
+ override def canAddArgumentsDynamically = true
- // NOTE: This line is not reached because the MainGenericRunner exits the VM.
- logger.debug("exiting")
+ /**
+ * Returns the list of QScripts passed in via -S so that their
+ * arguments can be inspected before QScript.script is called.
+ * @return Array of QScripts passed in.
+ */
+ override def getArgumentSources =
+ qScriptManager.getValues.asInstanceOf[Array[Class[_]]]
+
+ /**
+ * Returns the name of a QScript
+ * @return The name of a QScript
+ */
+ override def getArgumentSourceName(source: Class[_]) =
+ qScriptManager.getName(source.asSubclass(classOf[QScript]))
+
+ /**
+ * Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections.
+ * @return a ScalaCompoundArgumentTypeDescriptor
+ */
+ override def getArgumentTypeDescriptors =
+ Arrays.asList(new ScalaCompoundArgumentTypeDescriptor)
+
+ /**
+ * Loads the QScripts passed in and returns a new QScriptManager than can be used to create them.
+ */
+ private lazy val qScriptManager = {
+ QScriptManager.loadScripts(scripts)
+ new QScriptManager
+ }
+}
+
+/**
+ * Entry point of Queue. Compiles and runs QScripts passed in to the command line.
+ */
+object QCommandLine {
+ /**
+ * Main.
+ * @param argv Arguments.
+ */
+ def main(argv: Array[String]) {
+ try {
+ CommandLineProgram.start(new QCommandLine, argv);
+ if (CommandLineProgram.result != 0)
+ System.exit(CommandLineProgram.result);
+ } catch {
+ case e: Exception => CommandLineProgram.exitSystemWithError(e)
+ }
}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/QScript.scala b/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 7fa24e9ee..a795f664c 100755
--- a/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -1,109 +1,41 @@
package org.broadinstitute.sting.queue
-import org.broadinstitute.sting.queue.function.CommandLineFunction
-import org.broadinstitute.sting.queue.engine.QGraph
+import org.broadinstitute.sting.queue.util.Logging
/**
- * Syntactic sugar for filling in a pipeline using a Scala script.
+ * Defines a Queue pipeline as a collection of CommandLineFunctions.
*/
-object QScript {
+trait QScript extends Logging {
// Type aliases so users don't have to import
type File = java.io.File
type Input = org.broadinstitute.sting.commandline.Input
type Output = org.broadinstitute.sting.commandline.Output
+ type Argument = org.broadinstitute.sting.commandline.Argument
+ type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection
type CommandLineFunction = org.broadinstitute.sting.queue.function.CommandLineFunction
- type GatkFunction = org.broadinstitute.sting.queue.function.gatk.GatkFunction
type ScatterGatherableFunction = org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
type Scatter = org.broadinstitute.sting.queue.function.scattergather.Scatter
type Gather = org.broadinstitute.sting.queue.function.scattergather.Gather
- type BamGatherFunction = org.broadinstitute.sting.queue.function.scattergather.BamGatherFunction
type SimpleTextGatherFunction = org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction
- // The arguments for executing pipelines
- private var qArgs: QArguments = _
-
- // A default pipeline. Can also use multiple 'new Pipeline()'
- private val pipeline = new Pipeline
+ /**
+ * Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method.
+ */
+ def script: Unit
/**
- * Initializes the QArguments and returns a list of the rest of the user args.
+ * The command line functions that will be executed for this QScript.
*/
- def setArgs(params: Array[String]) = {
- qArgs = new QArguments(params)
- qArgs.userArgs
- }
-
- /**
- * Returns a list of files that were specified with "-I " on the command line
- * or inside a .list file.
- */
- def inputs(extension: String) = qArgs.inputPaths.filter(_.getName.endsWith(extension))
+ var functions = List.empty[CommandLineFunction]
/**
* Exchanges the extension on a file.
*/
- def swapExt(file: File, oldExtension: String, newExtension: String) =
+ protected def swapExt(file: File, oldExtension: String, newExtension: String) =
new File(file.getName.stripSuffix(oldExtension) + newExtension)
/**
- * Adds one or more command line functions for dispatch later during run()
+ * Adds one or more command line functions to be run.
*/
- def add(functions: CommandLineFunction*) = pipeline.add(functions:_*)
-
- /**
- * Sets the @Input and @Output values for all the functions
- */
- def setParams(): Unit = pipeline.setParams()
-
- /**
- * Sets the @Input and @Output values for a single function
- */
- def setParams(function: CommandLineFunction): Unit = pipeline.setParams(function)
-
- /**
- * Executes functions that have been added to the pipeline.
- */
- def run() = pipeline.run()
-
-
- /**
- * Encapsulates a set of functions to run together.
- */
- protected class Pipeline {
- private var functions = List.empty[CommandLineFunction]
-
- /**
- * Adds one or more command line functions for dispatch later during run()
- */
- def add(functions: CommandLineFunction*) =
- this.functions :::= List(functions:_*)
-
- /**
- * Sets the @Input and @Output values for all the functions
- */
- def setParams(): Unit =
- for (function <- functions) setParams(function)
-
- /**
- * Sets the @Input and @Output values for a single function
- */
- def setParams(function: CommandLineFunction): Unit =
- function.properties = qArgs.properties
-
- /**
- * Executes functions that have been added to the pipeline.
- */
- def run() = {
- val qGraph = new QGraph
- qGraph.dryRun = qArgs.dryRun
- qGraph.bsubAllJobs = qArgs.bsubAllJobs
- qGraph.bsubWaitJobs = qArgs.bsubWaitJobs
- qGraph.properties = qArgs.properties
- for (function <- functions)
- qGraph.add(function)
- qGraph.fillIn
- qGraph.run
- qGraph.renderToDot(new File("queue.dot"))
- }
- }
+ def add(functions: CommandLineFunction*) = this.functions ++= List(functions:_*)
}
diff --git a/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
new file mode 100644
index 000000000..1b8a00d91
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
@@ -0,0 +1,163 @@
+package org.broadinstitute.sting.queue
+
+import org.broadinstitute.sting.utils.classloader.PluginManager
+import scala.tools.nsc.{Global, Settings}
+import scala.tools.nsc.io.PlainFile
+import org.broadinstitute.sting.queue.util.{Logging, ClasspathUtils, IOUtils}
+import collection.JavaConversions
+import java.io.File
+import scala.tools.nsc.reporters.AbstractReporter
+import java.lang.String
+import org.apache.log4j.Level
+import scala.tools.nsc.util.{FakePos, NoPosition, Position}
+
+/**
+ * Plugin manager for QScripts which loads QScripts into the current class loader.
+ */
+class QScriptManager extends PluginManager[QScript](classOf[QScript], "QScript", "Script") with Logging {
+
+ /**
+ * Returns the list of QScripts classes found in the classpath.
+ * @return QScripts classes found in the classpath.
+ */
+ def getValues = {
+ if (logger.isTraceEnabled) {
+ logger.trace(JavaConversions.asMap(this.pluginsByName)
+ .foreach{case (name, clazz) => "Found QScript %s: %s".format(name, clazz)})
+ }
+ JavaConversions.asIterable(this.pluginsByName.values).toArray
+ }
+
+ /**
+ * Creates the QScripts for all values found in the classpath.
+ * @return QScripts found in the classpath.
+ */
+ def createScripts() = getValues.map(_.newInstance.asInstanceOf[QScript])
+}
+
+/**
+ * Plugin manager for QScripts which loads QScripts into the current classloader.
+ */
+object QScriptManager extends Logging {
+ /**
+ * Compiles and loads the scripts in the files into the current classloader.
+ * Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
+ * @param scripts Scala classes to compile.
+ */
+ def loadScripts(scripts: List[File]) {
+ if (scripts.size > 0) {
+
+ val settings = new Settings((error: String) => logger.error(error))
+ val outdir = IOUtils.tempDir("Q-classes").getAbsoluteFile
+ settings.outdir.value = outdir.getPath
+
+ // Set the classpath to the current class path.
+ ClasspathUtils.manifestAwareClassPath.foreach(path => settings.classpath.append(path.getPath))
+
+ val reporter = new Log4JReporter(settings)
+
+ val compiler = new Global(settings, reporter)
+ val run = new compiler.Run
+
+ logger.debug("Compiling %s QScript%s".format(scripts.size, plural(scripts.size)))
+ logger.trace("Compilation directory: " + settings.outdir.value)
+ run.compileFiles(scripts.map(new PlainFile(_)))
+
+ reporter.printSummary()
+ if (reporter.hasErrors) {
+ val msg = "Compile failed with %d error%s".format(
+ reporter.ERROR.count, plural(reporter.ERROR.count))
+ throw new QException(msg)
+ }
+ else if (reporter.WARNING.count > 0)
+ logger.warn("Compile succeeded with %d warning%s".format(
+ reporter.WARNING.count, plural(reporter.WARNING.count)))
+ else
+ logger.debug("Compilation complete")
+
+ // Add the new compilation output directory to the classpath.
+ ClasspathUtils.addClasspath(outdir)
+ }
+ }
+
+ /**
+ * Returns the string "s" if x is greater than 1.
+ * @param x Value to test.
+ * @return "s" if x is greater than one else "".
+ */
+ private def plural(x: Int) = if (x > 1) "s" else ""
+
+ /**
+ * NSC (New Scala Compiler) reporter which logs to Log4J.
+ * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala
+ */
+ private class Log4JReporter(val settings: Settings) extends AbstractReporter {
+ def displayPrompt = throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.")
+
+ /**
+ * Displays the message at position with severity.
+ * @param posIn Position of the event in the file that generated the message.
+ * @param msg Message to display.
+ * @param severity Severity of the event.
+ */
+ def display(posIn: Position, msg: String, severity: Severity) = {
+ severity.count += 1
+ val level = severity match {
+ case INFO => Level.INFO
+ case WARNING => Level.WARN
+ case ERROR => Level.ERROR
+ }
+ val pos = if (posIn eq null) NoPosition
+ else if (posIn.isDefined) posIn.inUltimateSource(posIn.source)
+ else posIn
+ pos match {
+ case FakePos(fmsg) =>
+ printMessage(level, fmsg+" "+msg)
+ case NoPosition =>
+ printMessage(level, msg)
+ case _ =>
+ val buf = new StringBuilder(msg)
+ val file = pos.source.file
+ printMessage(level, file.name+":"+pos.line+": "+msg)
+ printSourceLine(level, pos)
+ }
+ }
+
+ /**
+ * Prints a summary count of warnings and errors.
+ */
+ def printSummary() = {
+ if (WARNING.count > 0)
+ printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found")
+ if (ERROR.count > 0)
+ printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found")
+ }
+
+ /**
+ * Prints the source code line of an event followed by a pointer within the line to the error.
+ * @param level Severity level.
+ * @param pos Position in the file of the event.
+ */
+ private def printSourceLine(level: Level, pos: Position) {
+ printMessage(level, pos.lineContent.stripLineEnd)
+ printColumnMarker(level, pos)
+ }
+
+ /**
+ * Prints the column marker of the given position.
+ * @param level Severity level.
+ * @param pos Position in the file of the event.
+ */
+ private def printColumnMarker(level: Level, pos: Position) =
+ if (pos.isDefined) { printMessage(level, " " * (pos.column - 1) + "^") }
+
+ /**
+ * Prints the message at the severity level.
+ * @param level Severity level.
+ * @param message Message content.
+ */
+ private def printMessage(level: Level, message: String) = {
+ logger.log(level, message)
+ }
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala
deleted file mode 100755
index da23d3766..000000000
--- a/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-package org.broadinstitute.sting.queue.engine
-
-import org.broadinstitute.sting.queue.util.{Logging, ProcessUtils}
-import org.broadinstitute.sting.queue.function.CommandLineFunction
-
-/**
- * Runs jobs one at a time locally
- */
-trait CommandLineRunner extends Logging {
- def run(function: CommandLineFunction, qGraph: QGraph) = {
- if (logger.isDebugEnabled) {
- logger.debug(function.commandDirectory + " > " + function.commandLine)
- } else {
- logger.info(function.commandLine)
- }
-
- if (!qGraph.dryRun)
- ProcessUtils.runCommandAndWait(function.commandLine, function.commandDirectory)
- }
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala
index 88f48c1e5..d1d80d99b 100755
--- a/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala
+++ b/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala
@@ -1,22 +1,38 @@
package org.broadinstitute.sting.queue.engine
import collection.JavaConversions._
-import org.broadinstitute.sting.queue.function.{DispatchFunction, QFunction}
+import org.broadinstitute.sting.queue.function.{CommandLineFunction, QFunction}
import scala.collection.immutable.ListSet
+/**
+ * Dispatches jobs to a compute cluster.
+ */
trait DispatchJobRunner {
+ /** Type of the job. */
type DispatchJobType
- private var dispatchJobs = Map.empty[DispatchFunction, DispatchJobType]
+ /** An internal cache of all the jobs that have run by command line function. */
+ private var dispatchJobs = Map.empty[CommandLineFunction, DispatchJobType]
+ /** An internal list of functions that have no other dependencies. */
private var waitJobsByGraph = Map.empty[QGraph, ListSet[DispatchJobType]]
/**
* Dispatches a function to the queue and returns immediately, unless the function is a DispatchWaitFunction
* in which case it waits for all other terminal functions to complete.
+ * @param function Command to run.
+ * @param qGraph graph that holds the job, and if this is a dry run.
*/
- def dispatch(function: DispatchFunction, qGraph: QGraph)
+ def dispatch(function: CommandLineFunction, qGraph: QGraph)
- protected def addJob(function: DispatchFunction, qGraph: QGraph,
- dispatchJob: DispatchJobType, previousJobs: List[DispatchJobType]) = {
+ /**
+ * Adds the job to the internal cache of previous jobs and removes the previous jobs that
+ * the job was dependent on from the list of function that have no dependencies.
+ * @param function CommandLineFunction to add to the list.
+ * @param qGraph Current qGraph being iterated over.
+ * @param dispatchJob The job that is being added to the cache.
+ * @param previousJobs The previous jobs that the job was dependent one.
+ */
+ protected def addJob(function: CommandLineFunction, qGraph: QGraph,
+ dispatchJob: DispatchJobType, previousJobs: Iterable[DispatchJobType]) = {
dispatchJobs += function -> dispatchJob
var waitJobs = getWaitJobs(qGraph)
for (previousJob <- previousJobs)
@@ -26,7 +42,10 @@ trait DispatchJobRunner {
}
/**
- * Walks up the graph looking for the previous LsfJobs
+ * Walks up the graph looking for the previous LsfJobs.
+ * @param function Function to examine for a previous command line job.
+ * @param qGraph The graph that contains the jobs.
+ * @return A list of prior jobs.
*/
protected def previousJobs(function: QFunction, qGraph: QGraph) : List[DispatchJobType] = {
var previous = List.empty[DispatchJobType]
@@ -36,10 +55,10 @@ trait DispatchJobRunner {
incomingEdge match {
// Stop recursing when we find a job along the edge and return its job id
- case dispatchFunction: DispatchFunction => previous :+= dispatchJobs(dispatchFunction)
+ case dispatchFunction: CommandLineFunction => previous :+= dispatchJobs(dispatchFunction)
// For any other type of edge find the LSF jobs preceding the edge
- case qFunction: QFunction => previous = previousJobs(qFunction, qGraph) ::: previous
+ case qFunction: QFunction => previous ++= previousJobs(qFunction, qGraph)
}
}
previous
@@ -47,10 +66,25 @@ trait DispatchJobRunner {
/**
* Returns a set of jobs that have no following jobs in the graph.
+ * @param qGraph The graph that contains the jobs.
+ * @return ListSet[DispatchJobType] of previous jobs that have no dependent jobs.
*/
protected def getWaitJobs(qGraph: QGraph) = {
if (!waitJobsByGraph.contains(qGraph))
waitJobsByGraph += qGraph -> ListSet.empty[DispatchJobType]
waitJobsByGraph(qGraph)
}
+
+ /**
+ * Builds a command line that can be run to force an automount of the directories.
+ * @param function Function to look jobDirectories.
+ * @return A "cd [&& cd ]" command.
+ */
+ protected def mountCommand(function: CommandLineFunction) = {
+ val dirs = function.jobDirectories
+ if (dirs.size > 0)
+ Some("\'" + dirs.mkString("cd ", " && cd ", "") + "\'")
+ else
+ None
+ }
}
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala
index dc0780527..d49534a24 100644
--- a/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala
+++ b/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala
@@ -1,55 +1,76 @@
package org.broadinstitute.sting.queue.engine
-import collection.JavaConversions._
-import edu.mit.broad.core.lsf.LocalLsfJob
-import java.util.ArrayList
-import org.broadinstitute.sting.queue.util.Logging
-import org.broadinstitute.sting.queue.function.{DispatchWaitFunction, DispatchFunction}
+import org.broadinstitute.sting.queue.function.{CommandLineFunction, DispatchWaitFunction}
+import org.broadinstitute.sting.queue.util.{IOUtils, LsfJob, Logging}
+/**
+ * Runs jobs on an LSF compute cluster.
+ */
trait LsfJobRunner extends DispatchJobRunner with Logging {
- type DispatchJobType = LocalLsfJob
+ type DispatchJobType = LsfJob
- def dispatch(function: DispatchFunction, qGraph: QGraph) = {
- val job = new LocalLsfJob
- job.setName(function.jobName)
- job.setOutputFile(function.jobOutputFile)
- job.setErrFile(function.jobErrorFile)
- job.setWorkingDir(function.commandDirectory)
- job.setProject(function.jobProject)
- job.setQueue(function.jobQueue)
- job.setCommand(function.commandLine)
+ /**
+ * Dispatches the function on the LSF cluster.
+ * @param function Command to run.
+ * @param qGraph graph that holds the job, and if this is a dry run.
+ */
+ def dispatch(function: CommandLineFunction, qGraph: QGraph) = {
+ val job = new LsfJob
+ job.name = function.jobName
+ job.outputFile = function.jobOutputFile
+ job.errorFile = function.jobErrorFile
+ job.project = function.jobProject
+ job.queue = function.jobQueue
+ job.command = function.commandLine
- var extraArgs = List("-r")
+ if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory))
+ job.workingDir = function.commandDirectory
+
+ if (function.jobRestartable)
+ job.extraBsubArgs :+= "-r"
if (function.memoryLimit.isDefined)
- extraArgs :::= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
+ job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]")
- val previous =
+ val previous: Iterable[LsfJob] =
if (function.isInstanceOf[DispatchWaitFunction]) {
- extraArgs :+= "-K"
- getWaitJobs(qGraph).toList
+ job.waitForCompletion = true
+ getWaitJobs(qGraph)
} else {
previousJobs(function, qGraph)
}
- if (previous.size > 0)
- extraArgs :::= List("-w", dependencyExpression(previous))
+ mountCommand(function) match {
+ case Some(command) => job.preExecCommand = command
+ case None => /* ignore */
+ }
- job.setExtraBsubArgs(new ArrayList(extraArgs))
+ if (previous.size > 0)
+ job.extraBsubArgs ++= List("-w", dependencyExpression(previous, function.jobRunOnlyIfPreviousSucceed))
addJob(function, qGraph, job, previous)
if (logger.isDebugEnabled) {
- logger.debug(function.commandDirectory + " > " + job.getBsubCommand.mkString(" "))
+ logger.debug(function.commandDirectory + " > " + job.bsubCommand.mkString(" "))
} else {
- logger.info(job.getBsubCommand.mkString(" "))
+ logger.info(job.bsubCommand.mkString(" "))
}
if (!qGraph.dryRun)
- job.start
+ job.run
}
- private def dependencyExpression(jobs: List[LocalLsfJob]) = {
- jobs.toSet[LocalLsfJob].map(_.getName).mkString("ended(\"", "\") && ended(\"", "\")")
+ /**
+ * Returns the dependency expression for the prior jobs.
+ * @param jobs Previous jobs this job is dependent on.
+ * @param runOnSuccess Run the job only if the previous jobs succeed.
+ * @return The dependency expression for the prior jobs.
+ */
+ private def dependencyExpression(jobs: Iterable[LsfJob], runOnSuccess: Boolean) = {
+ val jobNames = jobs.toSet[LsfJob].map(_.name)
+ if (runOnSuccess)
+ jobNames.mkString("done(\"", "\") && done(\"", "\")")
+ else
+ jobNames.mkString("ended(\"", "\") && ended(\"", "\")")
}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
index 2670e82b3..9e1d68d86 100755
--- a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
+++ b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
@@ -6,22 +6,27 @@ import scala.collection.JavaConversions
import scala.collection.JavaConversions._
import org.broadinstitute.sting.queue.function.{MappingFunction, CommandLineFunction, QFunction}
import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
-import org.broadinstitute.sting.queue.util.{CollectionUtils, Logging}
+import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.QException
import org.jgrapht.alg.CycleDetector
import org.jgrapht.EdgeFactory
import org.jgrapht.ext.DOTExporter
-import org.broadinstitute.sting.queue.function.DispatchFunction
-import org.broadinstitute.sting.queue.function.gatk.GatkFunction
+import java.io.File
+/**
+ * The internal dependency tracker between sets of function input and output files.
+ */
class QGraph extends Logging {
var dryRun = true
var bsubAllJobs = false
var bsubWaitJobs = false
- var properties = Map.empty[String, String]
val jobGraph = newGraph
def numJobs = JavaConversions.asSet(jobGraph.edgeSet).filter(_.isInstanceOf[CommandLineFunction]).size
+ /**
+ * Adds a QScript created CommandLineFunction to the graph.
+ * @param command Function to add to the graph.
+ */
def add(command: CommandLineFunction) {
addFunction(command)
}
@@ -49,22 +54,30 @@ class QGraph extends Logging {
jobGraph.removeAllVertices(jobGraph.vertexSet.filter(isOrphan(_)))
}
+ /**
+ * Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph.
+ */
def run = {
var isReady = true
+ var totalMissingValues = 0
for (function <- JavaConversions.asSet(jobGraph.edgeSet)) {
function match {
case cmd: CommandLineFunction =>
- val missingValues = cmd.missingValues
- if (missingValues.size > 0) {
- isReady = false
- logger.error("Missing values for function: %s".format(cmd.commandLine))
- for (missing <- missingValues)
+ val missingFieldValues = cmd.missingFields
+ if (missingFieldValues.size > 0) {
+ totalMissingValues += missingFieldValues.size
+ logger.error("Missing %s values for function: %s".format(missingFieldValues.size, cmd.commandLine))
+ for (missing <- missingFieldValues)
logger.error(" " + missing)
}
case _ =>
}
}
+ if (totalMissingValues > 0) {
+ isReady = false
+ }
+
val detector = new CycleDetector(jobGraph)
if (detector.detectCycles) {
logger.error("Cycles were detected in the graph:")
@@ -75,11 +88,29 @@ class QGraph extends Logging {
if (isReady || this.dryRun)
(new TopologicalJobScheduler(this) with LsfJobRunner).runJobs
+
+ if (totalMissingValues > 0) {
+ logger.error("Total missing values: " + totalMissingValues)
+ }
+
+ if (isReady && this.dryRun) {
+ logger.info("Dry run completed successfully!")
+ logger.info("Re-run with \"-run\" to execute the functions.")
+ }
}
+ /**
+ * Creates a new graph where if new edges are needed (for cyclic dependency checking) they can be automatically created using a generic MappingFunction.
+ * @return A new graph
+ */
private def newGraph = new SimpleDirectedGraph[QNode, QFunction](new EdgeFactory[QNode, QFunction] {
- def createEdge(input: QNode, output: QNode) = new MappingFunction(input.items, output.items)})
+ def createEdge(input: QNode, output: QNode) = new MappingFunction(input.files, output.files)})
+ /**
+ * Adds a generic QFunction to the graph.
+ * If the function is scatterable and the jobs request bsub, splits the job into parts and adds the parts instead.
+ * @param f Generic QFunction to add to the graph.
+ */
private def addFunction(f: QFunction): Unit = {
try {
f.freeze
@@ -113,31 +144,53 @@ class QGraph extends Logging {
}
}
- private def addCollectionInputs(value: Any): Unit = {
- CollectionUtils.foreach(value, (item, collection) =>
- addMappingEdge(item, collection))
+ /**
+ * Checks to see if the set of files has more than one file and if so adds input mappings between the set and the individual files.
+ * @param files Set to check.
+ */
+ private def addCollectionInputs(files: Set[File]): Unit = {
+ if (files.size > 1)
+ for (file <- files)
+ addMappingEdge(Set(file), files)
}
- private def addCollectionOutputs(value: Any): Unit = {
- CollectionUtils.foreach(value, (item, collection) =>
- addMappingEdge(collection, item))
+ /**
+ * Checks to see if the set of files has more than one file and if so adds output mappings between the individual files and the set.
+ * @param files Set to check.
+ */
+ private def addCollectionOutputs(files: Set[File]): Unit = {
+ if (files.size > 1)
+ for (file <- files)
+ addMappingEdge(files, Set(file))
}
- private def addMappingEdge(input: Any, output: Any) = {
- val inputSet = asSet(input)
- val outputSet = asSet(output)
- val hasEdge = inputSet == outputSet ||
- jobGraph.getEdge(QNode(inputSet), QNode(outputSet)) != null ||
- jobGraph.getEdge(QNode(outputSet), QNode(inputSet)) != null
+ /**
+ * Adds a directed graph edge between the input set and the output set if there isn't a direct relationship between the two nodes already.
+ * @param input Input set of files.
+ * @param output Output set of files.
+ */
+ private def addMappingEdge(input: Set[File], output: Set[File]) = {
+ val hasEdge = input == output ||
+ jobGraph.getEdge(QNode(input), QNode(output)) != null ||
+ jobGraph.getEdge(QNode(output), QNode(input)) != null
if (!hasEdge)
- addFunction(new MappingFunction(inputSet, outputSet))
+ addFunction(new MappingFunction(input, output))
}
- private def asSet(value: Any): Set[Any] = if (value.isInstanceOf[Set[_]]) value.asInstanceOf[Set[Any]] else Set(value)
-
+ /**
+ * Returns true if the edge is an internal mapping edge.
+ * @param edge Edge to check.
+ * @return true if the edge is an internal mapping edge.
+ */
private def isMappingEdge(edge: QFunction) =
edge.isInstanceOf[MappingFunction]
+ /**
+ * Returns true if the edge is mapping edge that is not needed because it does
+ * not direct input or output from a user generated CommandLineFunction.
+ * @param edge Edge to check.
+ * @return true if the edge is not needed in the graph.
+ */
private def isFiller(edge: QFunction) = {
if (isMappingEdge(edge)) {
if (jobGraph.outgoingEdgesOf(jobGraph.getEdgeTarget(edge)).size == 0)
@@ -148,9 +201,19 @@ class QGraph extends Logging {
} else false
}
+ /**
+ * Returns true if the node is not connected to any edges.
+ * @param node Node (set of files) to check
+ * @return true if this set of files is not needed in the graph.
+ */
private def isOrphan(node: QNode) =
(jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0
+ /**
+ * Outputs the graph to a .dot file.
+ * http://en.wikipedia.org/wiki/DOT_language
+ * @param file Path to output the .dot file.
+ */
def renderToDot(file: java.io.File) = {
val out = new java.io.FileWriter(file)
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala b/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala
index 01d3b814c..480c1c88f 100644
--- a/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala
+++ b/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala
@@ -1,6 +1,9 @@
package org.broadinstitute.sting.queue.engine
+import java.io.File
+
/**
* Represents a state between QFunctions the directed acyclic QGraph
+ * @param files The set of files that represent this node state.
*/
-case class QNode (val items: Set[Any])
+case class QNode (val files: Set[File])
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala
new file mode 100755
index 000000000..abffa3c08
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala
@@ -0,0 +1,31 @@
+package org.broadinstitute.sting.queue.engine
+
+import org.broadinstitute.sting.queue.util.{Logging, ShellJob}
+import org.broadinstitute.sting.queue.function.CommandLineFunction
+
+/**
+ * Runs jobs one at a time locally
+ */
+trait ShellJobRunner extends Logging {
+ /**
+ * Runs the function on the local shell.
+ * @param function Command to run.
+ * @param qGraph graph that holds the job, and if this is a dry run.
+ */
+ def run(function: CommandLineFunction, qGraph: QGraph) = {
+ val job = new ShellJob
+ job.command = function.commandLine
+ job.workingDir = function.commandDirectory
+ job.outputFile = function.jobOutputFile
+ job.errorFile = function.jobErrorFile
+
+ if (logger.isDebugEnabled) {
+ logger.debug(function.commandDirectory + " > " + function.commandLine)
+ } else {
+ logger.info(function.commandLine)
+ }
+
+ if (!qGraph.dryRun)
+ job.run
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala b/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala
index 0831e184f..23a69846f 100755
--- a/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala
+++ b/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala
@@ -7,21 +7,29 @@ import org.broadinstitute.sting.queue.util.Logging
import org.broadinstitute.sting.queue.function._
/**
- * Loops over the job graph running jobs as the edges are traversed
+ * Loops over the job graph running jobs as the edges are traversed.
+ * @param val The graph that contains the jobs to be run.
*/
abstract class TopologicalJobScheduler(private val qGraph: QGraph)
- extends CommandLineRunner with DispatchJobRunner with Logging {
+ extends ShellJobRunner with DispatchJobRunner with Logging {
protected val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QFunction] {
+ /**
+ * As each edge is traversed, either dispatch the job or run it locally.
+ * @param event Event holding the edge that was passed.
+ */
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QFunction]) = event.getEdge match {
- case f: DispatchFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph)
+ case f: CommandLineFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph)
case f: CommandLineFunction => run(f, qGraph)
case f: MappingFunction => /* do nothing for mapping functions */
}
})
+ /**
+ * Runs the jobs by traversing the graph.
+ */
def runJobs = {
logger.info("Number of jobs: %s".format(qGraph.numJobs))
if (logger.isTraceEnabled)
@@ -39,7 +47,6 @@ abstract class TopologicalJobScheduler(private val qGraph: QGraph)
if (qGraph.bsubAllJobs && qGraph.bsubWaitJobs) {
logger.info("Waiting for jobs to complete.")
val wait = new DispatchWaitFunction
- wait.properties = qGraph.properties
wait.freeze
dispatch(wait, qGraph)
}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala
new file mode 100644
index 000000000..13ce477c4
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala
@@ -0,0 +1,17 @@
+package org.broadinstitute.sting.queue.extensions.gatk
+
+import org.broadinstitute.sting.queue.function.JarCommandLineFunction
+import org.broadinstitute.sting.commandline.Argument
+import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
+
+/**
+ * Merges BAM files using Picards MergeSampFiles.jar.
+ * At the Broad the jar can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
+ */
+class BamGatherFunction extends GatherFunction with JarCommandLineFunction {
+ @Argument(doc="Compression level 1-9", required=false)
+ var compressionLevel: Option[Int] = None
+
+ override def commandLine = super.commandLine + "%s%s%s".format(
+ optional(" COMPRESSION_LEVEL=", compressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala
new file mode 100644
index 000000000..82ef24b2d
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala
@@ -0,0 +1,35 @@
+package org.broadinstitute.sting.queue.extensions.gatk
+
+import org.broadinstitute.sting.queue.function.CommandLineFunction
+import java.io.File
+import org.broadinstitute.sting.commandline.{Argument, Output, Input}
+
+/**
+ * Indexes a BAM file.
+ * By default uses samtools index.
+ * The syntax of the script must be:
+ *
+ */
+class BamIndexFunction extends CommandLineFunction {
+ @Argument(doc="BAM file script")
+ var bamIndexScript: String = "samtools index"
+
+ @Input(doc="BAM file to index")
+ var bamFile: File = _
+
+ @Output(doc="BAM file index to output", required=false)
+ var bamFileIndex: File = _
+
+ /**
+ * Sets the bam file index to the bam file name + ".bai".
+ */
+ override def freezeFieldValues = {
+ super.freezeFieldValues
+ if (bamFileIndex == null && bamFile != null)
+ bamFileIndex = new File(bamFile.getPath + ".bai")
+ }
+
+ def commandLine = "%s %s %s".format(bamIndexScript, bamFile, bamFileIndex)
+
+ override def dotString = "Index: %s".format(bamFile.getName)
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala
new file mode 100755
index 000000000..01de9c8f9
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala
@@ -0,0 +1,8 @@
+package org.broadinstitute.sting.queue.extensions.gatk
+
+/**
+ * Splits intervals by contig instead of evenly.
+ */
+class ContigScatterFunction extends IntervalScatterFunction {
+ splitIntervalsScript = "splitIntervalsByContig.py"
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala
new file mode 100644
index 000000000..dfb94d48f
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala
@@ -0,0 +1,16 @@
+package org.broadinstitute.sting.queue.extensions.gatk
+
+import org.broadinstitute.sting.commandline.Argument
+import org.broadinstitute.sting.queue.function.scattergather.ScatterFunction
+
+/**
+ * An interval scatter function that allows the script to be swapped out.
+ * The syntax of the script must be:
+ * [.. ]
+ */
+class IntervalScatterFunction extends ScatterFunction {
+ @Argument(doc="Interval split script")
+ var splitIntervalsScript: String = "splitIntervals.sh"
+
+ def commandLine = "%s %s%s".format(splitIntervalsScript, originalInput, repeat(" ", scatterParts))
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala
new file mode 100644
index 000000000..bce054ba0
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala
@@ -0,0 +1,14 @@
+package org.broadinstitute.sting.queue.extensions.gatk
+
+import java.io.File
+import org.broadinstitute.sting.queue.function.FileProvider
+
+/**
+ * Used to provide -B rodBinding arguments to the GATK.
+ */
+case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider {
+ require(trackName != null, "RodBind trackName cannot be null")
+ require(trackType != null, "RodBind trackType cannot be null")
+ require(file != null, "RodBind file cannot be null")
+ override def toString = "%s,%s,%s".format(trackName, trackType, file)
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
index d94ac998b..847103f10 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
@@ -1,74 +1,402 @@
package org.broadinstitute.sting.queue.function
import org.broadinstitute.sting.queue.util._
-import java.lang.reflect.Field
import java.lang.annotation.Annotation
-import org.broadinstitute.sting.commandline.{Input, Output}
+import org.broadinstitute.sting.commandline._
+import java.io.File
+import collection.JavaConversions._
+import org.broadinstitute.sting.queue.function.scattergather.{SimpleTextGatherFunction, Gather}
+import java.lang.management.ManagementFactory
+import org.broadinstitute.sting.queue.QException
-trait CommandLineFunction extends InputOutputFunction with DispatchFunction {
- var properties = Map.empty[String, String]
+/**
+ * A command line that will be run in a pipeline.
+ */
+trait CommandLineFunction extends QFunction with Logging {
+ def commandLine: String
- def inputFieldsWithValues = inputFields.filter(hasFieldValue(_))
- def outputFieldsWithValues = outputFields.filter(hasFieldValue(_))
+ /** Upper memory limit */
+ var memoryLimit: Option[Int] = None
+
+ /** Whether a job is restartable */
+ var jobRestartable = true
+
+ /** Directory to run the command in. */
+ var commandDirectory: File = IOUtils.CURRENT_DIR
+
+ /** Prefix for automatic job name creation */
+ var jobNamePrefix: String = CommandLineFunction.processNamePrefix
+
+ /** The name name of the job */
+ var jobName: String = _
+
+ /** Job project to run the command */
+ var jobProject = "Queue"
+
+ /** Job queue to run the command */
+ var jobQueue = "broad"
+
+ /** Temporary directory to write any files */
+ var jobTempDir: File = new File(System.getProperty("java.io.tmpdir"))
+
+ /** If true this function will run only if the jobs it is dependent on succeed. */
+ var jobRunOnlyIfPreviousSucceed = true
+
+ /** File to redirect any output. Defaults to .out */
+ @Output(doc="File to redirect any output", required=false)
+ @Gather(classOf[SimpleTextGatherFunction])
+ var jobOutputFile: File = _
+
+ /** File to redirect any errors. Defaults to .out */
+ @Output(doc="File to redirect any errors", required=false)
+ @Gather(classOf[SimpleTextGatherFunction])
+ var jobErrorFile: File = _
+
+ /** The complete list of fields on this CommandLineFunction. */
+ lazy val functionFields: List[ArgumentSource] = ParsingEngine.extractArgumentSources(this.getClass).toList
+ /** The @Input fields on this CommandLineFunction. */
+ lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input]))
+ /** The @Output fields on this CommandLineFunction. */
+ lazy val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output]))
+ /** The @Argument fields on this CommandLineFunction. */
+ lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument]))
/**
- * Sets parameters from the arg map.
+ * Returns set of directories required to run the command.
+ * @return Set of directories required to run the command.
*/
- override def freeze = {
- for ((name, value) <- properties) addOrUpdateWithStringValue(name, value)
+ def jobDirectories = {
+ var dirs = Set.empty[File]
+ dirs += commandDirectory
+ if (jobTempDir != null)
+ dirs += jobTempDir
+ dirs ++= inputs.map(_.getParentFile)
+ dirs ++= outputs.map(_.getParentFile)
+ dirs
+ }
+
+ /**
+ * Returns the input files for this function.
+ * @return Set[File] inputs for this function.
+ */
+ def inputs = getFieldFiles(inputFields)
+
+ /**
+ * Returns the output files for this function.
+ * @return Set[File] outputs for this function.
+ */
+ def outputs = getFieldFiles(outputFields)
+
+ /**
+ * Gets the files from the fields. The fields must be a File, a FileProvider, or a List or Set of either.
+ * @param fields Fields to get files.
+ * @return Set[File] for the fields.
+ */
+ private def getFieldFiles(fields: List[ArgumentSource]): Set[File] = {
+ var files = Set.empty[File]
+ for (field <- fields)
+ files ++= getFieldFiles(field)
+ files
+ }
+
+ /**
+ * Gets the files from the field. The field must be a File, a FileProvider, or a List or Set of either.
+ * @param fields Field to get files.
+ * @return Set[File] for the field.
+ */
+ def getFieldFiles(field: ArgumentSource): Set[File] = {
+ var files = Set.empty[File]
+ CollectionUtils.foreach(getFieldValue(field), (fieldValue) => {
+ val file = fieldValueToFile(field, fieldValue)
+ if (file != null)
+ files += file
+ })
+ files
+ }
+
+ /**
+ * Gets the file from the field. The field must be a File or a FileProvider and not a List or Set.
+ * @param field Field to get the file.
+ * @return File for the field.
+ */
+ def getFieldFile(field: ArgumentSource): File =
+ fieldValueToFile(field, getFieldValue(field))
+
+ /**
+ * Converts the field value to a file. The field must be a File or a FileProvider.
+ * @param field Field to get the file.
+ * @param value Value of the File or FileProvider or null.
+ * @return Null if value is null, otherwise the File.
+ * @throws QException if the value is not a File or FileProvider.
+ */
+ private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match {
+ case file: File => file
+ case fileProvider: FileProvider => fileProvider.file
+ case null => null
+ case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or implement FileProvider: %s: %s".format(field.field, unknown))
+ }
+
+ /**
+ * Resets the field to the temporary directory.
+ * @param field Field to get and set the file.
+ * @param tempDir new root for the file.
+ */
+ def resetFieldFile(field: ArgumentSource, tempDir: File): File = {
+ getFieldValue(field) match {
+ case file: File => {
+ val newFile = IOUtils.resetParent(tempDir, file)
+ setFieldValue(field, newFile)
+ newFile
+ }
+ case fileProvider: FileProvider => {
+ fileProvider.file = IOUtils.resetParent(tempDir, fileProvider.file)
+ fileProvider.file
+ }
+ case null => null
+ case unknown =>
+ throw new QException("Unable to set file from %s: %s".format(field, unknown))
+ }
+ }
+
+ /**
+ * The function description in .dot files
+ */
+ override def dotString = jobName + " => " + commandLine
+
+ /**
+ * Sets all field values and makes them canonical so that the graph can
+ * match the inputs of one function to the output of another using equals().
+ */
+ final override def freeze = {
+ freezeFieldValues
+ canonFieldValues
super.freeze
}
+ /**
+ * Sets all field values.
+ */
+ def freezeFieldValues = {
+ if (jobName == null)
+ jobName = CommandLineFunction.nextJobName(jobNamePrefix)
+
+ if (jobOutputFile == null)
+ jobOutputFile = new File(jobName + ".out")
+
+ commandDirectory = IOUtils.subDir(IOUtils.CURRENT_DIR, commandDirectory)
+ }
+
+ /**
+ * Makes all field values canonical so that the graph can match the
+ * inputs of one function to the output of another using equals().
+ */
+ def canonFieldValues = {
+ for (field <- this.functionFields) {
+ var fieldValue = this.getFieldValue(field)
+ fieldValue = CollectionUtils.updated(fieldValue, canon).asInstanceOf[AnyRef]
+ this.setFieldValue(field, fieldValue)
+ }
+ }
+
+ /**
+ * Set value to a uniform value across functions.
+ * Base implementation changes any relative path to an absolute path.
+ * @param value to be updated
+ * @returns the modified value, or a copy if the value is immutable
+ */
+ protected def canon(value: Any) = {
+ value match {
+ case file: File => absolute(file)
+ case fileProvider: FileProvider => fileProvider.file = absolute(fileProvider.file); fileProvider
+ case x => x
+ }
+ }
+
+ /**
+ * Returns the absolute path to the file relative to the job command directory.
+ * @param file File to root relative to the command directory if it is not already absolute.
+ * @return The absolute path to file.
+ */
+ private def absolute(file: File) = IOUtils.subDir(commandDirectory, file)
+
/**
* Repeats parameters with a prefix/suffix if they are set otherwise returns "".
* Skips null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString.
+ * @param prefix Command line prefix per parameter.
+ * @param params Traversable parameters.
+ * @param suffix Optional suffix per parameter.
+ * @param separator Optional separator per parameter.
+ * @param format Format string if the value has a value
+ * @return The generated string
*/
- protected def repeat(prefix: String, params: Seq[_], suffix: String = "", separator: String = "") =
- params.filter(param => hasValue(param)).map(param => prefix + toValue(param) + suffix).mkString(separator)
+ protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", format: String = "%s") =
+ params.filter(param => hasValue(param)).map(param => prefix + toValue(param, format) + suffix).mkString(separator)
/**
* Returns parameter with a prefix/suffix if it is set otherwise returns "".
* Does not output null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString.
+ * @param prefix Command line prefix per parameter.
+ * @param param Parameters to check for a value.
+ * @param suffix Optional suffix per parameter.
+ * @param format Format string if the value has a value
+ * @return The generated string
*/
- protected def optional(prefix: String, param: Any, suffix: String = "") =
- if (hasValue(param)) prefix + toValue(param) + suffix else ""
+ protected def optional(prefix: String, param: Any, suffix: String = "", format: String = "%s") =
+ if (hasValue(param)) prefix + toValue(param, format) + suffix else ""
- def missingValues = {
+ /**
+ * Returns fields that do not have values which are required.
+ * @return List[String] names of fields missing values.
+ */
+ def missingFields: List[String] = {
val missingInputs = missingFields(inputFields, classOf[Input])
val missingOutputs = missingFields(outputFields, classOf[Output])
- missingInputs | missingOutputs
+ val missingArguments = missingFields(argumentFields, classOf[Argument])
+ (missingInputs | missingOutputs | missingArguments).toList.sorted
}
- private def missingFields(fields: List[Field], annotation: Class[_ <: Annotation]) = {
+ /**
+ * Returns fields that do not have values which are required.
+ * @param sources Fields to check.
+ * @param annotation Annotation.
+ * @return Set[String] names of fields missing values.
+ */
+ private def missingFields(sources: List[ArgumentSource], annotation: Class[_ <: Annotation]): Set[String] = {
var missing = Set.empty[String]
- for (field <- fields) {
- if (isRequired(field, annotation))
- if (!hasValue(ReflectionUtils.getValue(this, field)))
- missing += field.getName
+ for (source <- sources) {
+ if (isRequired(source, annotation))
+ if (!hasFieldValue(source))
+ if (!exclusiveOf(source, annotation).exists(otherSource => hasFieldValue(otherSource)))
+ missing += "@%s: %s - %s".format(annotation.getSimpleName, source.field.getName, doc(source, annotation))
}
missing
}
- private def isRequired(field: Field, annotationClass: Class[_ <: Annotation]) =
- getAnnotationValue(field.getAnnotation(annotationClass), "required").asInstanceOf[Boolean]
-
- private def getAnnotationValue(annotation: Annotation, method: String) =
- annotation.getClass.getMethod(method).invoke(annotation)
-
- protected def hasFieldValue(field: Field) = hasValue(this.getFieldValue(field))
-
- private def hasValue(param: Any) = param match {
- case null => false
- case Nil => false
- case None => false
- case _ => true
+ /**
+ * Scala sugar type for checking annotation required and exclusiveOf.
+ */
+ private type ArgumentAnnotation = {
+ /**
+ * Returns true if the field is required.
+ * @return true if the field is required.
+ */
+ def required(): Boolean
+ /**
+ * Returns the comma separated list of fields that may be set instead of this field.
+ * @return the comma separated list of fields that may be set instead of this field.
+ */
+ def exclusiveOf(): String
+ /**
+ * Returns the documentation for this field.
+ * @return the documentation for this field.
+ */
+ def doc(): String
}
- private def toValue(param: Any): String = param match {
- case null => ""
- case Nil => ""
- case None => ""
- case Some(x) => x.toString
- case x => x.toString
+ /**
+ * Returns the isRequired value from the field.
+ * @param field Field to check.
+ * @param annotation Annotation.
+ * @return the isRequired value from the field annotation.
+ */
+ private def isRequired(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
+ ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].required
+
+ /**
+ * Returns an array of ArgumentSources from functionFields listed in the exclusiveOf of the original field
+ * @param field Field to check.
+ * @param annotation Annotation.
+ * @return the Array[ArgumentSource] that may be set instead of the field.
+ */
+ private def exclusiveOf(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
+ ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].exclusiveOf
+ .split(",").map(_.trim).filter(_.length > 0)
+ .map(fieldName => functionFields.find(fieldName == _.field.getName) match {
+ case Some(x) => x
+ case None => throw new QException("Unable to find exclusion field %s on %s".format(fieldName, this.getClass.getSimpleName))
+ })
+
+ /**
+ * Returns the doc value from the field.
+ * @param field Field to check.
+ * @param annotation Annotation.
+ * @return the doc value from the field annotation.
+ */
+ private def doc(field: ArgumentSource, annotation: Class[_ <: Annotation]) =
+ ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].doc
+
+ /**
+ * Returns true if the field has a value.
+ * @param source Field to check for a value.
+ * @return true if the field has a value.
+ */
+ protected def hasFieldValue(source: ArgumentSource) = this.hasValue(this.getFieldValue(source))
+
+ /**
+ * Returns false if the value is null or an empty collection.
+ * @param value Value to test for null, or a collection to test if it is empty.
+ * @return false if the value is null, or false if the collection is empty, otherwise true.
+ */
+ private def hasValue(param: Any) = CollectionUtils.isNotNullOrNotEmpty(param)
+
+ /**
+ * Returns "" if the value is null or an empty collection, otherwise return the value.toString.
+ * @param value Value to test for null, or a collection to test if it is empty.
+ * @param format Format string if the value has a value
+ * @return "" if the value is null, or "" if the collection is empty, otherwise the value.toString.
+ */
+ private def toValue(param: Any, format: String): String = if (CollectionUtils.isNullOrEmpty(param)) "" else
+ param match {
+ case Some(x) => format.format(x)
+ case x => format.format(x)
+ }
+
+ /**
+ * Gets the value of a field.
+ * @param source Field to get the value for.
+ * @return value of the field.
+ */
+ def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field)
+
+ /**
+ * Gets the value of a field.
+ * @param source Field to set the value for.
+ * @return value of the field.
+ */
+ def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value)
+
+ /**
+ * Walks gets the fields in this object or any collections in that object
+ * recursively to find the object holding the field to be retrieved or set.
+ * @param source Field find the invoke object for.
+ * @return Object to invoke the field on.
+ */
+ private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _))
+}
+
+/**
+ * A command line that will be run in a pipeline.
+ */
+object CommandLineFunction {
+ /** A semi-unique job prefix using the host name and the process id. */
+ private val processNamePrefix = "Q-" + {
+ var prefix = ManagementFactory.getRuntimeMXBean.getName
+ val index = prefix.indexOf(".")
+ if (index >= 0)
+ prefix = prefix.substring(0, index)
+ prefix
+ }
+
+ /** Job index counter for this run of Queue. */
+ private var jobIndex = 0
+
+ /**
+ * Returns the next job name using the prefix.
+ * @param prefix Prefix of the job name.
+ * @return the next job name.
+ */
+ private def nextJobName(prefix: String) = {
+ jobIndex += 1
+ prefix + "-" + jobIndex
}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala
deleted file mode 100644
index eb70e31f7..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-package org.broadinstitute.sting.queue.function
-
-import java.io.File
-import java.lang.management.ManagementFactory
-import org.broadinstitute.sting.queue.function.scattergather.{Gather, SimpleTextGatherFunction}
-import org.broadinstitute.sting.queue.util.IOUtils
-import org.broadinstitute.sting.commandline.{ClassType, Output, Input}
-
-trait DispatchFunction extends InputOutputFunction {
- def commandLine: String
-
- @Input(doc="Upper memory limit", required=false)
- @ClassType(classOf[Int])
- var memoryLimit: Option[Int] = None
-
- /**
- * The directory where the command should run.
- */
- @Input(doc="Directory to write any files", required=false)
- var commandDirectory: File = IOUtils.CURRENT_DIR
-
- @Input(doc="Prefix for automatic job name creation", required=false)
- var jobNamePrefix: String = _
-
- @Input(doc="Job name to run on the farm", required=false)
- var jobName: String = _
-
- @Output(doc="File to redirect any output", required=false)
- @Gather(classOf[SimpleTextGatherFunction])
- var jobOutputFile: File = _
-
- @Output(doc="File to redirect any errors", required=false)
- @Gather(classOf[SimpleTextGatherFunction])
- var jobErrorFile: File = _
-
- @Input(doc="Job project to run the command", required=false)
- var jobProject = "Queue"
-
- @Input(doc="Job queue to run the command", required=false)
- var jobQueue = "broad"
-
- override def freeze = {
- if (jobNamePrefix == null)
- jobNamePrefix = DispatchFunction.processNamePrefix
-
- if (jobName == null)
- jobName = DispatchFunction.nextJobName(jobNamePrefix)
-
- if (jobOutputFile == null)
- jobOutputFile = new File(jobName + ".out")
-
- if (jobErrorFile == null)
- jobErrorFile = new File(jobName + ".err")
-
- commandDirectory = IOUtils.absolute(IOUtils.CURRENT_DIR, commandDirectory)
-
- super.freeze
- }
-
- override def dotString = jobName + " => " + commandLine
-
- /**
- * Override the canon function to change any relative path to an absolute path.
- */
- override protected def canon(value: Any) = {
- value match {
- case file: File => IOUtils.absolute(commandDirectory, file)
- case x => super.canon(x)
- }
- }
-
- def absolute(file: File) = IOUtils.absolute(commandDirectory, file)
- def temp(subDir: String) = IOUtils.sub(commandDirectory, jobName + "-" + subDir)
-
- override def toString = commandLine
-}
-
-object DispatchFunction {
- private val processNamePrefix = "Q-" + {
- var prefix = ManagementFactory.getRuntimeMXBean.getName
- val index = prefix.indexOf(".")
- if (index >= 0)
- prefix = prefix.substring(0, index)
- prefix
- }
-
- private var jobIndex = 0
-
- private def nextJobName(prefix: String) = {
- jobIndex += 1
- prefix + "-" + jobIndex
- }
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala
index 6bcafa87a..83e1557ea 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala
@@ -2,10 +2,14 @@ package org.broadinstitute.sting.queue.function
import java.io.File
+/** An internal class that is used by bsub to wait on all other jobs before exiting. */
class DispatchWaitFunction extends CommandLineFunction {
+ /**
+ * Returns the command line "echo".
+ * @return echo
+ */
def commandLine = "echo"
jobQueue = "short"
jobOutputFile = File.createTempFile("Q-wait", ".out")
- jobErrorFile = File.createTempFile("Q-wait", ".err")
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala b/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala
new file mode 100644
index 000000000..b139cfff6
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala
@@ -0,0 +1,11 @@
+package org.broadinstitute.sting.queue.function
+
+import java.io.File
+
+/**
+ * An trait for @Input or @Output CommandLineFunction fields that are not files, but have a File that can be get/set.
+ */
+trait FileProvider {
+ /** Gets/Sets the file. */
+ var file: File
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala
deleted file mode 100644
index 5d686437b..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.broadinstitute.sting.queue.function
-
-import java.lang.reflect.Field
-import org.broadinstitute.sting.queue.util._
-import org.broadinstitute.sting.commandline.{Input, Output}
-
-/**
- * A function with @Inputs and @Outputs tagging fields that can be set by the user in a QScript
- */
-trait InputOutputFunction extends QFunction with Cloneable {
- def getFieldValue(field: Field) = ReflectionUtils.getValue(this, field)
- def setFieldValue(field: Field, value: Any) = ReflectionUtils.setValue(this, field, value)
-
- def functionFields: List[Field] = inputFields ::: outputFields
- def inputFields = ReflectionUtils.filterFields(fields, classOf[Input])
- def outputFields = ReflectionUtils.filterFields(fields, classOf[Output])
-
- private lazy val fields = ReflectionUtils.getAllFields(this.getClass)
- // TODO: Need to handle argument collections where field is not on THIS
- def inputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, inputFields)).toSet
- def outputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, outputFields)).toSet
-
- /**
- * Sets a field value using the name of the field.
- * Field must be annotated with @Input or @Output
- * @return true if the value was found and set
- */
- protected def addOrUpdateWithStringValue(name: String, value: String) = {
- fields.find(_.getName == name) match {
- case Some(field) =>
- val isInput = ReflectionUtils.hasAnnotation(field, classOf[Input])
- val isOutput = ReflectionUtils.hasAnnotation(field, classOf[Output])
- if (isInput || isOutput) {
- ReflectionUtils.addOrUpdateWithStringValue(this, field, value)
- }
- true
- // TODO: Need to handle argument collections where field is not on THIS
- case None => false
- }
- }
-
- def cloneFunction() = clone.asInstanceOf[this.type]
- // explicitly overriden so that trait function cloneFunction can use this.clone
- override protected def clone = super.clone
-
- /**
- * As the function is frozen, changes all fields to their canonical forms.
- */
- override def freeze = {
- for (field <- this.functionFields)
- mapField(field, canon)
- super.freeze
- }
-
- def mapField(field: Field, f: Any => Any): Any = {
- var fieldValue = this.getFieldValue(field)
- fieldValue = CollectionUtils.updated(fieldValue, f).asInstanceOf[AnyRef]
- this.setFieldValue(field, fieldValue)
- fieldValue
- }
-
- /**
- * Set value to a uniform value across functions.
- * The biggest example is file paths relative to the command directory in DispatchFunction
- */
- protected def canon(value: Any): Any = value
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala
deleted file mode 100644
index e525f58b6..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala
+++ /dev/null
@@ -1,8 +0,0 @@
-package org.broadinstitute.sting.queue.function
-
-import java.io.File
-
-trait IntervalFunction extends InputOutputFunction {
- var referenceFile: File
- var intervals: File
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala
new file mode 100644
index 000000000..29d5d3ca7
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala
@@ -0,0 +1,15 @@
+package org.broadinstitute.sting.queue.function
+
+import org.broadinstitute.sting.commandline.Argument
+import java.io.File
+
+/**
+ * Defines a command line function that runs from a jar file.
+ */
+trait JarCommandLineFunction extends CommandLineFunction {
+ @Argument(doc="jar")
+ var jarFile: File = _
+
+ def commandLine = "java%s -Djava.io.tmpdir=%s -jar %s"
+ .format(optional(" -Xmx", memoryLimit, "g"), jobTempDir, jarFile)
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala
index cd4b14246..a1d28df21 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala
@@ -1,9 +1,15 @@
package org.broadinstitute.sting.queue.function
+import java.io.File
+
/**
* Utility class to map a set of inputs to set of outputs.
* The QGraph uses this function internally to map between user defined functions.
*/
-class MappingFunction(val inputs: Set[Any], val outputs: Set[Any]) extends QFunction {
- override def toString = "" // For debugging
+class MappingFunction(val inputs: Set[File], val outputs: Set[File]) extends QFunction {
+ /**
+ * For debugging purposes returns .
+ * @returns
+ */
+ override def toString = ""
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
index 491e4c887..68a4bf4bc 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
@@ -1,5 +1,7 @@
package org.broadinstitute.sting.queue.function
+import java.io.File
+
/**
* The base interface for all functions in Queue.
* Inputs and outputs are specified as Sets of values.
@@ -16,12 +18,15 @@ trait QFunction {
/**
* Set of inputs for this function.
*/
- def inputs: Set[Any]
+ def inputs: Set[File]
/**
* Set of outputs for this function.
*/
- def outputs: Set[Any]
+ def outputs: Set[File]
+ /**
+ * The function description in .dot files
+ */
def dotString = ""
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala
deleted file mode 100644
index b509fceb9..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala
+++ /dev/null
@@ -1,38 +0,0 @@
-package org.broadinstitute.sting.queue.function.gatk
-
-import java.io.File
-import org.broadinstitute.sting.queue.function.IntervalFunction
-import org.broadinstitute.sting.queue.function.scattergather.{Scatter, ScatterGatherableFunction, IntervalScatterFunction}
-import org.broadinstitute.sting.commandline.{ClassType, Input}
-import org.apache.log4j.Level
-
-trait GatkFunction extends ScatterGatherableFunction with IntervalFunction {
- @Input(doc="Temporary directory to write any files", required=false)
- var javaTmpDir: String = _
-
- @Input(doc="GATK jar")
- var gatkJar: String = _
-
- @Input(doc="Reference fasta")
- var referenceFile: File = _
-
- @Input(doc="Bam files", required=false)
- @ClassType(classOf[File])
- var bamFiles: List[File] = Nil
-
- @Input(doc="Intervals", required=false)
- @Scatter(classOf[IntervalScatterFunction])
- var intervals: File = _
-
- @Input(doc="DBSNP", required=false)
- var dbsnp: File = _
-
- @Input(doc="Logging level", required=false)
- var gatkLoggingLevel: String = _
-
- protected def gatkCommandLine(walker: String) =
- "java%s%s -jar %s -T %s -R %s%s%s%s%s "
- .format(optional(" -Xmx", memoryLimit, "g"), optional(" -Djava.io.tmpdir=", javaTmpDir),
- gatkJar, walker, referenceFile, repeat(" -I ", bamFiles), optional(" -l ", gatkLoggingLevel),
- optional(" -D ", dbsnp), optional(" -L ", intervals))
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala
deleted file mode 100644
index b433ee10e..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala
+++ /dev/null
@@ -1,17 +0,0 @@
-package org.broadinstitute.sting.queue.function.scattergather
-
-import java.io.File
-import org.broadinstitute.sting.commandline.Input
-
-class BamGatherFunction extends GatherFunction {
- type GatherType = File
-
- @Input(doc="Picard MergeSamFiles.jar. At the Broad this can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
- var picardMergeSamFilesJar: String = _
-
- @Input(doc="Compression level 1-9", required=false)
- var picardMergeCompressionLevel: Option[Int] = None
-
- def commandLine = "java -jar %s%s%s%s".format(picardMergeSamFilesJar,
- optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala
index a3ebc953b..cd6b9bf38 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala
@@ -1,15 +1,24 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
-import org.broadinstitute.sting.commandline.Input
import java.io.File
+import org.broadinstitute.sting.commandline.{Argument, Input}
+/**
+ * Removes the temporary directories for scatter / gather.
+ * The script can be changed by setting rmdirScript.
+ * By default uses rm -rf.
+ * The format of the call is [.. ]
+ */
class CleanupTempDirsFunction extends CommandLineFunction {
@Input(doc="Original outputs of the gather functions")
- var originalOutputs: Set[Any] = Set.empty[Any]
+ var originalOutputs: Set[File] = Set.empty[File]
@Input(doc="Temporary directories to be deleted")
var tempDirectories: List[File] = Nil
- def commandLine = "rm -rf%s".format(repeat(" '", tempDirectories, "'"))
+ @Argument(doc="rmdir script or command")
+ var rmdirScript = "rm -rf"
+
+ def commandLine = "%s%s".format(rmdirScript, repeat(" '", tempDirectories, "'"))
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala
deleted file mode 100755
index 613c17e35..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package org.broadinstitute.sting.queue.function.scattergather
-
-import java.io.File
-import org.broadinstitute.sting.commandline.Input
-import org.broadinstitute.sting.queue.function.IntervalFunction
-
-class ContigScatterFunction extends ScatterFunction {
- type ScatterType = File
-
- @Input(doc="Reference file to scatter")
- var referenceFile: File = _
-
- override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {
- val command = originalFunction.asInstanceOf[IntervalFunction]
- referenceFile = command.referenceFile
- super.setOriginalFunction(originalFunction)
- }
-
- // TODO: Use the reference file for "all"
- def commandLine = "splitIntervalsByContig.py %s%s".format(originalInput, repeat(" ", scatterParts))
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala
index de1a16652..67161169b 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala
@@ -2,25 +2,28 @@ package org.broadinstitute.sting.queue.function.scattergather
import java.io.File
import org.broadinstitute.sting.queue.function.CommandLineFunction
-import org.broadinstitute.sting.commandline.{Output, Input}
+import org.broadinstitute.sting.commandline.{Argument, Output, Input}
+/**
+ * Creates the temporary directories for scatter / gather.
+ * The script can be changed by setting mkdirScript.
+ * By default uses mkdir -pv
+ * The format of the call is [.. ]
+ */
class CreateTempDirsFunction extends CommandLineFunction {
@Input(doc="Original inputs to the scattered function")
- var originalInputs: Set[Any] = Set.empty[Any]
+ var originalInputs: Set[File] = Set.empty[File]
@Output(doc="Temporary directories to create")
var tempDirectories: List[File] = Nil
- @Input(doc="Sleep seconds", required=false)
- var mkdirSleepSeconds: Option[Int] = None
+ @Argument(doc="mkdir script or command")
+ var mkdirScript = "mkdir -pv"
- // TODO: After port of LSF submitter use -cwd instead of trying to run from the directory
- // For now, create the directory so that BroadCore can run bsub from it -kshakir July 27, 2010 on chartl's computer
+ def commandLine = "%s%s".format(mkdirScript, repeat(" '", tempDirectories, "'"))
- override def freeze = {
- super.freeze
- tempDirectories.foreach(_.mkdirs)
- }
-
- def commandLine = "mkdir -pv%s%s".format(repeat(" '", tempDirectories, "'"), optional(" && sleep ", mkdirSleepSeconds))
+ /**
+ * This function is creating the directories, so returns just this command directory.
+ */
+ override def jobDirectories = Set(commandDirectory)
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala
deleted file mode 100644
index 6a36236ce..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala
+++ /dev/null
@@ -1,17 +0,0 @@
-package org.broadinstitute.sting.queue.function.scattergather
-
-import java.io.File
-import org.broadinstitute.sting.commandline.Input
-
-class FixMatesGatherFunction extends GatherFunction {
- type GatherType = File
-
- @Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/")
- var picardFixMatesJar: String = _
-
- @Input(doc="Compression level 1-9", required=false)
- var picardMergeCompressionLevel: Option[Int] = None
-
- def commandLine = "java -Djava.io.tmpdir=/broad/shptmp/queue -jar %s%s%s%s".format(picardFixMatesJar,
- optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts))
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala
index 3bced51ee..f5886865a 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala
@@ -1,20 +1,31 @@
package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.{CommandLineFunction}
-import org.broadinstitute.sting.commandline.{Input, Output}
+import java.io.File
+import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output}
/**
* Base class for Gather command line functions.
- * NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits.
*/
-abstract class GatherFunction extends CommandLineFunction {
- type GatherType
-
+trait GatherFunction extends CommandLineFunction {
@Input(doc="Parts to gather back into the original output")
- var gatherParts: List[GatherType] = Nil
+ var gatherParts: List[File] = Nil
@Output(doc="The original output of the scattered function")
- var originalOutput: GatherType = _
+ var originalOutput: File = _
- def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {}
+ /**
+ * Sets the original function used to create this scatter function.
+ * @param originalFunction The ScatterGatherableFunction.
+ * @param gatherField The field being gathered.
+ */
+ def setOriginalFunction(originalFunction: ScatterGatherableFunction, gatherField: ArgumentSource) = {}
+
+ /**
+ * Sets the clone function creating one of the inputs for this gather function.
+ * @param cloneFunction The clone of the ScatterGatherableFunction.
+ * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
+ * @param gatherField The field to be gathered.
+ */
+ def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, gatherField: ArgumentSource) = {}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala
deleted file mode 100644
index 8408622c0..000000000
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package org.broadinstitute.sting.queue.function.scattergather
-
-import java.io.File
-import org.broadinstitute.sting.commandline.Input
-import org.broadinstitute.sting.queue.function.IntervalFunction
-
-class IntervalScatterFunction extends ScatterFunction {
- type ScatterType = File
-
- @Input(doc="Reference file to scatter")
- var referenceFile: File = _
-
- override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {
- val command = originalFunction.asInstanceOf[IntervalFunction]
- referenceFile = command.referenceFile
- super.setOriginalFunction(originalFunction)
- }
-
- // TODO: Use the reference file for "all"
- def commandLine = "splitIntervals.sh %s%s".format(originalInput, repeat(" ", scatterParts))
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala
index 05320ccb8..b0a8ab794 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala
@@ -2,23 +2,33 @@ package org.broadinstitute.sting.queue.function.scattergather
import org.broadinstitute.sting.queue.function.CommandLineFunction
import java.io.File
-import org.broadinstitute.sting.commandline.{Input, Output}
+import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output}
/**
* Base class for Scatter command line functions.
- * NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits.
*/
-abstract class ScatterFunction extends CommandLineFunction {
- type ScatterType
-
+trait ScatterFunction extends CommandLineFunction {
@Input(doc="Original input to scatter")
- var originalInput: ScatterType = _
+ var originalInput: File = _
+
+ @Output(doc="Scattered parts of the original input, one per temp directory")
+ var scatterParts: List[File] = Nil
@Input(doc="Temporary directories for each scatter part")
var tempDirectories: List[File] = Nil
- @Output(doc="Scattered parts of the original input, one per temp directory")
- var scatterParts: List[ScatterType] = Nil
+ /**
+ * Sets the original function used to create this scatter function.
+ * @param originalFunction The ScatterGatherableFunction.
+ * @param scatterField The field being scattered.
+ */
+ def setOriginalFunction(originalFunction: ScatterGatherableFunction, scatterField: ArgumentSource) = {}
- def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {}
+ /**
+ * Sets the clone function using one of the outputs of this scatter function.
+ * @param cloneFunction The clone of the ScatterGatherableFunction.
+ * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
+ * @param scatterField The field being scattered.
+ */
+ def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, scatterField: ArgumentSource) = {}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
index a70263dd0..aa54f5672 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
@@ -1,141 +1,367 @@
package org.broadinstitute.sting.queue.function.scattergather
-import org.broadinstitute.sting.queue.function.CommandLineFunction
-import java.lang.reflect.Field
import java.io.File
import org.broadinstitute.sting.queue.util._
-import org.broadinstitute.sting.commandline.Input
+import org.broadinstitute.sting.commandline.ArgumentSource
+import org.broadinstitute.sting.queue.function.CommandLineFunction
+import com.rits.cloning.Cloner
+/**
+ * A function that can be run faster by splitting it up into pieces and then joining together the results.
+ */
trait ScatterGatherableFunction extends CommandLineFunction {
- @Input(doc="Number of parts to scatter the function into")
+ /** Number of parts to scatter the function into" */
var scatterCount: Int = 1
- def scatterField = this.inputFields.find(field => ReflectionUtils.hasAnnotation(field, classOf[Scatter])).get
+ /** scatter gather directory */
+ var scatterGatherDirectory: File = _
- def scatterGatherable = {
- if (scatterCount < 2)
- false
- else if (!hasFieldValue(scatterField))
- false
- else
- true
- }
+ /** cleanup temporary directories */
+ var cleanupTempDirectories = false
- def generateFunctions() = ScatterGatherableFunction.generateFunctions(this)
-}
+ /** Class to use for creating temporary directories. Defaults to CreateTempDirsFunction. */
+ var createTempDirsClass: Class[_ <: CreateTempDirsFunction] = _
-object ScatterGatherableFunction {
- private def generateFunctions(originalFunction: ScatterGatherableFunction) = {
+ /** Class to use for scattering. Defaults to the annotation used in the @Scatter tag. */
+ var scatterClass: Class[_ <: ScatterFunction] = _
+
+ /**
+ * Function that returns the class to use for gathering a directory. If it returns null then @Gather annotation will be used.
+ * @param gatherField Field that is to be gathered.
+ * @return The class of the GatherFunction to be used or null.
+ */
+ var gatherClass: PartialFunction[ArgumentSource, Class[_ <: GatherFunction]] = _
+
+ /** Class to use for removing temporary directories. Defaults to CleanupTempDirsFunction. */
+ var cleanupTempDirsClass: Class[_ <: CleanupTempDirsFunction] = _
+
+ /**
+ * Allows external modification of the CreateTempDirsFunction that will create the temporary directories.
+ * @param initializeFunction The function that will create the temporary directories.
+ * @param inputFields The input fields that the original function was dependent on.
+ */
+ var setupInitializeFunction: PartialFunction[(CreateTempDirsFunction, List[ArgumentSource]), Unit] = _
+
+ /**
+ * Allows external modification of the ScatterFunction that will create the scatter pieces in the temporary directories.
+ * @param scatterFunction The function that will create the scatter pieces in the temporary directories.
+ * @param scatterField The input field being scattered.
+ */
+ var setupScatterFunction: PartialFunction[(ScatterFunction, ArgumentSource), Unit] = _
+
+ /**
+ * Allows external modification of the GatherFunction that will collect the gather pieces in the temporary directories.
+ * @param gatherFunction The function that will merge the gather pieces from the temporary directories.
+ * @param gatherField The output field being gathered.
+ */
+ var setupGatherFunction: PartialFunction[(GatherFunction, ArgumentSource), Unit] = _
+
+ /**
+ * Allows external modification of the cloned function.
+ * @param cloneFunction The clone of this ScatterGatherableFunction
+ * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
+ */
+ var setupCloneFunction: PartialFunction[(ScatterGatherableFunction, Int), Unit] = _
+
+ /**
+ * Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories.
+ * @param cleanupFunction The function that will remove the temporary directories.
+ * @param gatherFunctions The functions that will gather up the original output fields.
+ * @param outputFields The output fields that the original function was dependent on.
+ */
+ var setupCleanupFunction: PartialFunction[(CleanupTempDirsFunction, Map[ArgumentSource, GatherFunction], List[ArgumentSource]), Unit] = _
+
+ /**
+ * Returns true if the function is ready to be scatter / gathered.
+ * The base implementation checks if the scatter count is greater than one,
+ * and that the scatter field has a value.
+ * @return true if the function is ready to be scatter / gathered.
+ */
+ def scatterGatherable = this.scatterCount > 1 && hasFieldValue(this.scatterField)
+
+ /**
+ * Returns a list of scatter / gather and clones of this function
+ * that can be run in parallel to produce the same output as this
+ * command line function.
+ * @return List[CommandLineFunction] to run instead of this function.
+ */
+ def generateFunctions() = {
var functions = List.empty[CommandLineFunction]
var tempDirectories = List.empty[File]
- // Create a function that will remove any temporary items
- var cleanupFunction = new CleanupTempDirsFunction
- cleanupFunction.properties = originalFunction.properties
- cleanupFunction.jobNamePrefix = originalFunction.jobNamePrefix
- cleanupFunction.commandDirectory = originalFunction.commandDirectory
-
- // Find the field with @Scatter and its value
- var scatterField = originalFunction.scatterField
- val originalValue = originalFunction.getFieldValue(scatterField)
+ // Only depend on input fields that have a value
+ val inputFieldsWithValues = this.inputFields.filter(hasFieldValue(_))
+ // Only gather up fields that will have a value
+ val outputFieldsWithValues = this.outputFields.filter(hasFieldValue(_))
// Create the scatter function based on @Scatter
- val scatterFunction = getScatterFunction(scatterField)
- scatterFunction.setOriginalFunction(originalFunction)
- scatterFunction.properties = originalFunction.properties
- scatterFunction.jobNamePrefix = originalFunction.jobNamePrefix
- scatterFunction.commandDirectory = originalFunction.temp("scatter-" + scatterField.getName)
- scatterFunction.originalInput = originalValue.asInstanceOf[scatterFunction.ScatterType]
+ val scatterFunction = this.newScatterFunction(this.scatterField)
+ initScatterFunction(scatterFunction, this.scatterField)
tempDirectories :+= scatterFunction.commandDirectory
functions :+= scatterFunction
// Create the gather functions for each output field
- var gatherFunctions = Map.empty[Field, GatherFunction]
- for (outputField <- originalFunction.outputFieldsWithValues) {
-
- // Create the gather function based on @Gather
- val gatherFunction = getGatherFunction(outputField)
- gatherFunction.setOriginalFunction(originalFunction)
- gatherFunction.properties = originalFunction.properties
- gatherFunction.jobNamePrefix = originalFunction.jobNamePrefix
- gatherFunction.commandDirectory = originalFunction.temp("gather-" + outputField.getName)
-
- val gatheredValue = originalFunction.getFieldValue(outputField).asInstanceOf[gatherFunction.GatherType]
- gatherFunction.originalOutput = gatheredValue
-
+ var gatherFunctions = Map.empty[ArgumentSource, GatherFunction]
+ for (gatherField <- outputFieldsWithValues) {
+ val gatherFunction = this.newGatherFunction(gatherField)
+ initGatherFunction(gatherFunction, gatherField)
tempDirectories :+= gatherFunction.commandDirectory
- cleanupFunction.originalOutputs += gatheredValue
-
functions :+= gatherFunction
-
- gatherFunctions += outputField -> gatherFunction
+ gatherFunctions += gatherField -> gatherFunction
}
// Create the clone functions for running the parallel jobs
var cloneFunctions = List.empty[CommandLineFunction]
- for (i <- 1 to originalFunction.scatterCount) {
- val cloneFunction = newFunctionClone(originalFunction)
+ for (i <- 1 to this.scatterCount) {
+ val cloneFunction = this.newCloneFunction()
+ initCloneFunction(cloneFunction, i)
cloneFunctions :+= cloneFunction
+ tempDirectories :+= cloneFunction.commandDirectory
- val tempDir = originalFunction.temp("temp-"+i)
- cloneFunction.commandDirectory = tempDir
- tempDirectories :+= tempDir
-
- // Reset the input of the clone to the the temp dir and add it as an output of the scatter
- var scatterPart = CollectionUtils.updated(originalValue, resetToTempDir(tempDir))
- scatterFunction.scatterParts :+= scatterPart.asInstanceOf[scatterFunction.ScatterType]
- cloneFunction.setFieldValue(scatterField, scatterPart)
-
- // For each each output field, change value to the temp dir and feed it into the gatherer
- for (outputField <- originalFunction.outputFields) {
- val gatherFunction = gatherFunctions(outputField)
- val gatherPart = cloneFunction.mapField(outputField, resetToTempDir(tempDir))
- gatherFunction.gatherParts :+= gatherPart.asInstanceOf[gatherFunction.GatherType]
- }
+ bindCloneFunctionScatter(scatterFunction, this.scatterField, cloneFunction, i)
+ // For each each output field, change value to the scatterGatherTempDir dir and feed it into the gatherer
+ for (gatherField <- outputFieldsWithValues)
+ bindCloneFunctionGather(gatherFunctions(gatherField), gatherField, cloneFunction, i)
}
- functions = cloneFunctions ::: functions
+ functions ++= cloneFunctions
- // Create a function to create all of the temp directories.
+ // Create a function to create all of the scatterGatherTempDir directories.
// All of its inputs are the inputs of the original function.
- val initializeFunction = new CreateTempDirsFunction
- initializeFunction.properties = originalFunction.properties
- initializeFunction.jobNamePrefix = originalFunction.jobNamePrefix
- initializeFunction.commandDirectory = originalFunction.commandDirectory
+ val initializeFunction = this.newInitializeFunction()
+ initInitializeFunction(initializeFunction, inputFieldsWithValues)
- for (inputField <- originalFunction.inputFieldsWithValues)
- initializeFunction.originalInputs += originalFunction.getFieldValue(inputField)
+ // Create a function that will remove any temporary items
+ // All of its inputs are the outputs of the original function.
+ var cleanupFunction = newCleanupFunction()
+ initCleanupFunction(cleanupFunction, gatherFunctions, outputFieldsWithValues)
+ // Set the temporary directories, for the initialize function as outputs for scatter and cleanup as inputs.
initializeFunction.tempDirectories = tempDirectories
scatterFunction.tempDirectories = tempDirectories
cleanupFunction.tempDirectories = tempDirectories
functions +:= initializeFunction
- functions :+= cleanupFunction
+ if (this.cleanupTempDirectories)
+ functions :+= cleanupFunction
// Return all the various functions we created
functions
}
- private def resetToTempDir(tempDir: File): Any => Any = {
- (any: Any) => {
- any match {
- case file: File => IOUtils.reset(tempDir, file)
- case x => x
- }
- }
+ /**
+ * Sets the scatter gather directory to the command directory if it is not already set.
+ */
+ override def freezeFieldValues = {
+ super.freezeFieldValues
+ if (this.scatterGatherDirectory == null)
+ this.scatterGatherDirectory = this.commandDirectory
}
- private def getScatterFunction(inputField: Field) =
- ReflectionUtils.getAnnotation(inputField, classOf[Scatter]).value.newInstance.asInstanceOf[ScatterFunction]
+ /**
+ * Retrieves the scatter field from the first field that has the annotation @Scatter.
+ */
+ protected lazy val scatterField =
+ this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get
- private def getGatherFunction(outputField: Field) =
- ReflectionUtils.getAnnotation(outputField, classOf[Gather]).value.newInstance.asInstanceOf[GatherFunction]
+ /**
+ * Creates a new initialize CreateTempDirsFunction that will create the temporary directories.
+ * @return A CreateTempDirsFunction that will create the temporary directories.
+ */
+ protected def newInitializeFunction(): CreateTempDirsFunction = {
+ if (createTempDirsClass != null)
+ this.createTempDirsClass.newInstance
+ else
+ new CreateTempDirsFunction
+ }
- private def newFunctionClone(originalFunction: ScatterGatherableFunction) = {
- val cloneFunction = originalFunction.cloneFunction.asInstanceOf[ScatterGatherableFunction]
+ /**
+ * Initializes the CreateTempDirsFunction that will create the temporary directories.
+ * The initializeFunction jobNamePrefix is set so that the CreateTempDirsFunction runs with the same prefix as this ScatterGatherableFunction.
+ * The initializeFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction.
+ * The initializeFunction is modified to become dependent on the input files for this ScatterGatherableFunction.
+ * Calls setupInitializeFunction with initializeFunction.
+ * @param initializeFunction The function that will create the temporary directories.
+ * @param inputFields The input fields that the original function was dependent on.
+ */
+ protected def initInitializeFunction(initializeFunction: CreateTempDirsFunction, inputFields: List[ArgumentSource]) = {
+ initializeFunction.jobNamePrefix = this.jobNamePrefix
+ initializeFunction.commandDirectory = this.commandDirectory
+ for (inputField <- inputFields)
+ initializeFunction.originalInputs ++= this.getFieldFiles(inputField)
+ if (this.setupInitializeFunction != null)
+ if (this.setupInitializeFunction.isDefinedAt(initializeFunction, inputFields))
+ this.setupInitializeFunction(initializeFunction, inputFields)
+ }
+
+ /**
+ * Creates a new ScatterFunction for the scatterField.
+ * @param scatterField Field that defined @Scatter.
+ * @return A ScatterFunction instantiated from @Scatter or scatterClass if scatterClass was set on this ScatterGatherableFunction.
+ */
+ protected def newScatterFunction(scatterField: ArgumentSource): ScatterFunction = {
+ var scatterClass = this.scatterClass
+ if (scatterClass == null)
+ scatterClass = ReflectionUtils.getAnnotation(scatterField.field, classOf[Scatter])
+ .value.asSubclass(classOf[ScatterFunction])
+ scatterClass.newInstance.asInstanceOf[ScatterFunction]
+ }
+
+ /**
+ * Initializes the ScatterFunction created by newScatterFunction() that will create the scatter pieces in the temporary directories.
+ * The scatterFunction jobNamePrefix is set so that the ScatterFunction runs with the same prefix as this ScatterGatherableFunction.
+ * The scatterFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory.
+ * The scatterFunction has it's originalInput set with the file to be scattered into scatterCount pieces.
+ * Calls scatterFunction.setOriginalFunction with this ScatterGatherableFunction.
+ * Calls setupScatterFunction with scatterFunction.
+ * @param scatterFunction The function that will create the scatter pieces in the temporary directories.
+ * @param scatterField The input field being scattered.
+ */
+ protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = {
+ scatterFunction.jobNamePrefix = this.jobNamePrefix
+ scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName)
+ scatterFunction.originalInput = this.getFieldFile(scatterField)
+ scatterFunction.setOriginalFunction(this, scatterField)
+ if (this.setupScatterFunction != null)
+ if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField))
+ this.setupScatterFunction(scatterFunction, scatterField)
+ }
+
+ /**
+ * Creates a new GatherFunction for the gatherField.
+ * @param gatherField Field that defined @Gather.
+ * @return A GatherFunction instantiated from @Gather.
+ */
+ protected def newGatherFunction(gatherField: ArgumentSource) : GatherFunction = {
+ var gatherClass: Class[_ <: GatherFunction] = null
+ if (this.gatherClass != null)
+ if (this.gatherClass.isDefinedAt(gatherField))
+ gatherClass = this.gatherClass(gatherField)
+ if (gatherClass == null)
+ gatherClass = ReflectionUtils.getAnnotation(gatherField.field, classOf[Gather])
+ .value.asSubclass(classOf[GatherFunction])
+ gatherClass.newInstance.asInstanceOf[GatherFunction]
+ }
+
+ /**
+ * Initializes the GatherFunction created by newGatherFunction() that will collect the gather pieces in the temporary directories.
+ * The gatherFunction jobNamePrefix is set so that the GatherFunction runs with the same prefix as this ScatterGatherableFunction.
+ * The gatherFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory.
+ * The gatherFunction has it's originalOutput set with the file to be gathered from the scatterCount pieces.
+ * Calls the gatherFunction.setOriginalFunction with this ScatterGatherableFunction.
+ * Calls setupGatherFunction with gatherFunction.
+ * @param gatherFunction The function that will merge the gather pieces from the temporary directories.
+ * @param gatherField The output field being gathered.
+ */
+ protected def initGatherFunction(gatherFunction: GatherFunction, gatherField: ArgumentSource) = {
+ gatherFunction.jobNamePrefix = this.jobNamePrefix
+ gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName)
+ gatherFunction.originalOutput = this.getFieldFile(gatherField)
+ gatherFunction.setOriginalFunction(this, gatherField)
+ if (this.setupGatherFunction != null)
+ if (this.setupGatherFunction.isDefinedAt(gatherFunction, gatherField))
+ this.setupGatherFunction(gatherFunction, gatherField)
+ }
+
+ /**
+ * Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter.
+ * @return A clone of this ScatterGatherableFunction
+ */
+ protected def newCloneFunction(): ScatterGatherableFunction = {
+ val cloneFunction = ScatterGatherableFunction.cloner.deepClone(this)
// Make sure clone doesn't get scattered
cloneFunction.scatterCount = 1
cloneFunction
}
+
+ /**
+ * Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory.
+ * Calls setupCloneFunction with cloneFunction.
+ * @param cloneFunction The clone of this ScatterGatherableFunction
+ * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
+ */
+ protected def initCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int) = {
+ cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index)
+ if (this.setupCloneFunction != null)
+ if (this.setupCloneFunction.isDefinedAt(cloneFunction, index))
+ this.setupCloneFunction(cloneFunction, index)
+ }
+
+ /**
+ * Joins a piece of the ScatterFunction output to the cloned function's input.
+ * The input of the clone is changed to be in the output directory of the clone.
+ * The scatter function piece is added as an output of the scatterFunction.
+ * The clone function's original input is changed to use the piece from the output directory.
+ * Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction.
+ * @param scatterFunction Function that will create the pieces including the piece that will go to cloneFunction.
+ * @param scatterField The field to be scattered.
+ * @param cloneFunction Clone of this ScatterGatherableFunction.
+ * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
+ */
+ protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
+ // Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter
+ val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput)
+ scatterFunction.scatterParts :+= scatterPart
+ cloneFunction.setFieldValue(scatterField, scatterPart)
+ scatterFunction.setCloneFunction(cloneFunction, index, scatterField)
+ }
+
+ /**
+ * Joins the cloned function's output as a piece of the GatherFunction's input.
+ * Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction.
+ * @param cloneFunction Clone of this ScatterGatherableFunction.
+ * @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction.
+ * @param gatherField The field to be gathered.
+ */
+ protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = {
+ val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory)
+ gatherFunction.gatherParts :+= gatherPart
+ gatherFunction.setCloneFunction(cloneFunction, index, gatherField)
+ }
+
+ /**
+ * Creates a new function that will remove the temporary directories.
+ * @return A CleanupTempDirs function that will remove the temporary directories.
+ */
+ protected def newCleanupFunction(): CleanupTempDirsFunction = {
+ if (cleanupTempDirsClass != null)
+ this.cleanupTempDirsClass.newInstance
+ else
+ new CleanupTempDirsFunction
+ }
+
+ /**
+ * Initializes the CleanupTempDirsFunction created by newCleanupFunction() that will remove the temporary directories.
+ * The cleanupFunction jobNamePrefix is set so that the CleanupTempDirsFunction runs with the same prefix as this ScatterGatherableFunction.
+ * The cleanupFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction.
+ * The initializeFunction is modified to become dependent on the output files for this ScatterGatherableFunction.
+ * Calls setupCleanupFunction with cleanupFunction.
+ * @param cleanupFunction The function that will remove the temporary directories.
+ * @param gatherFunctions The functions that will gather up the original output fields.
+ * @param outputFields The output fields that the original function was dependent on.
+ */
+ protected def initCleanupFunction(cleanupFunction: CleanupTempDirsFunction, gatherFunctions: Map[ArgumentSource, GatherFunction], outputFields: List[ArgumentSource]) = {
+ cleanupFunction.jobNamePrefix = this.jobNamePrefix
+ cleanupFunction.commandDirectory = this.commandDirectory
+ for (gatherField <- outputFields)
+ cleanupFunction.originalOutputs += gatherFunctions(gatherField).originalOutput
+ if (this.setupCleanupFunction != null)
+ if (this.setupCleanupFunction.isDefinedAt(cleanupFunction, gatherFunctions, outputFields))
+ this.setupCleanupFunction(cleanupFunction, gatherFunctions, outputFields)
+ }
+
+ /**
+ * Returns a temporary directory under this scatter gather directory.
+ * @param Sub directory under the scatter gather directory.
+ * @return temporary directory under this scatter gather directory.
+ */
+ private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-" + subDir)
+}
+
+/**
+ * A function that can be run faster by splitting it up into pieces and then joining together the results.
+ */
+object ScatterGatherableFunction {
+ /** Used to deep clone a ScatterGatherableFunction. */
+ private lazy val cloner = new Cloner
}
diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala
index 070c36115..9a5681e4d 100644
--- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala
+++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala
@@ -1,10 +1,16 @@
package org.broadinstitute.sting.queue.function.scattergather
-import java.io.File
+import org.broadinstitute.sting.commandline.Argument
+/**
+ * Merges a text file.
+ * The script can be changed by setting rmdirScript.
+ * By default uses mergeText.sh in Sting/shell.
+ * The format of the call is [.. ]
+ */
class SimpleTextGatherFunction extends GatherFunction {
- type GatherType = File
+ @Argument(doc="merge text script")
+ var mergeTextScript = "mergeText.sh"
- // TODO: Write a text merging utility that takes into account headers.
- def commandLine = "mergeText.sh %s%s".format(originalOutput, repeat(" ", gatherParts))
+ def commandLine = "%s %s%s".format(mergeTextScript, originalOutput, repeat(" ", gatherParts))
}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala
index f3a0f43b5..36bc97a7c 100755
--- a/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala
+++ b/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala
@@ -4,14 +4,32 @@ import collection.JavaConversions._
import org.reflections.util.ManifestAwareClasspathHelper
import java.io.File
import javax.print.URIException
+import java.net.{URL, URLClassLoader}
/**
* Builds the correct class path by examining the manifests
*/
object ClasspathUtils {
+
+ /**
+ * Returns a list of files that build up the classpath, taking into account jar file manifests.
+ * @return List[File] that build up the current classpath.
+ */
def manifestAwareClassPath = {
var urls = ManifestAwareClasspathHelper.getUrlsForManifestCurrentClasspath
- var files = urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)})
- files.mkString(File.pathSeparator)
+ urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)})
+ }
+
+ /**
+ * Adds the directory to the system class loader classpath using reflection.
+ * HACK: Uses reflection to modify the class path, and assumes loader is a URLClassLoader
+ * @param path Directory to add to the system class loader classpath.
+ */
+ def addClasspath(path: File): Unit = {
+ val url = path.toURI.toURL
+ val method = classOf[URLClassLoader].getDeclaredMethod("addURL", classOf[URL]);
+ if (!method.isAccessible)
+ method.setAccessible(true);
+ method.invoke(ClassLoader.getSystemClassLoader(), url);
}
}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala
index b384c8dfa..6871d8f4b 100644
--- a/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala
+++ b/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala
@@ -1,18 +1,16 @@
package org.broadinstitute.sting.queue.util
/**
- * Utilities that try to deeply apply operations to collections
+ * Utilities that try to deeply apply operations to collections, specifically Traversable and Option.
*/
object CollectionUtils {
- def test(value: Any, f: Any => Boolean): Boolean = {
- var result = f(value)
- foreach(value, (item, collection) => {
- result |= f(item)
- })
- result
- }
-
+ /**
+ * Loops though a collection running the function f on each value.
+ * @param value The value to run f on, or a collection of values for which f should be run on.
+ * @param f The function to run on value, or to run on the values within the collection.
+ * @return The updated value.
+ */
def updated(value: Any, f: Any => Any): Any = {
value match {
case traversable: Traversable[_] => traversable.map(updated(_, f))
@@ -21,6 +19,11 @@ object CollectionUtils {
}
}
+ /**
+ * Utility for recursively processing collections.
+ * @param value Initial the collection to be processed
+ * @param f a function that will be called for each (item, collection) in the initial collection
+ */
def foreach(value: Any, f: (Any, Any) => Unit): Unit = {
value match {
case traversable: Traversable[_] =>
@@ -37,11 +40,24 @@ object CollectionUtils {
}
}
- // Because scala allows but throws NPE when trying to hash a collection with a null in it.
- // http://thread.gmane.org/gmane.comp.lang.scala.internals/3267
- // https://lampsvn.epfl.ch/trac/scala/ticket/2935
- def removeNullOrEmpty[T](value: T): T = filterNotNullOrNotEmpty(value)
+ /**
+ * Utility for recursively processing collections.
+ * @param value Initial the collection to be processed
+ * @param f a function that will be called for each (item, collection) in the initial collection
+ */
+ def foreach(value: Any, f: (Any) => Unit): Unit = {
+ value match {
+ case traversable: Traversable[_] => traversable.foreach(f(_))
+ case option: Option[_] => option.foreach(f(_))
+ case item => f(item)
+ }
+ }
+ /**
+ * Removes empty values from collections.
+ * @param value The collection to test.
+ * @return The value if it is not a collection, otherwise the collection with nulls and empties removed.
+ */
private def filterNotNullOrNotEmpty[T](value: T): T = {
val newValue = value match {
case traversable: Traversable[_] => traversable.map(filterNotNullOrNotEmpty(_)).filter(isNotNullOrNotEmpty(_)).asInstanceOf[T]
@@ -51,7 +67,20 @@ object CollectionUtils {
newValue
}
- private def isNotNullOrNotEmpty(value: Any): Boolean = {
+
+ /**
+ * Returns true if the value is null or an empty collection.
+ * @param value Value to test for null, or a collection to test if it is empty.
+ * @return true if the value is null, or false if the collection is empty, otherwise true.
+ */
+ def isNullOrEmpty(value: Any): Boolean = !isNotNullOrNotEmpty(value)
+
+ /**
+ * Returns false if the value is null or an empty collection.
+ * @param value Value to test for null, or a collection to test if it is empty.
+ * @return false if the value is null, or false if the collection is empty, otherwise true.
+ */
+ def isNotNullOrNotEmpty(value: Any): Boolean = {
val result = value match {
case traversable: Traversable[_] => !filterNotNullOrNotEmpty(traversable).isEmpty
case option: Option[_] => !filterNotNullOrNotEmpty(option).isEmpty
diff --git a/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala b/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala
new file mode 100644
index 000000000..0b322301e
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala
@@ -0,0 +1,51 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io.File
+
+/**
+ * Base class for a command line job.
+ */
+abstract class CommandLineJob {
+ var command: String = _
+ var workingDir: File = _
+ var inputFile: File = _
+ var outputFile: File = _
+ var errorFile: File = _
+
+ /**
+ * Runs the command, either immediately or dispatching it to a compute farm.
+ * If it is dispatched to a compute farm it should not start until jobs it depends on are finished.
+ */
+ def run()
+
+ /**
+ * Returns the content of a command output.
+ * @param streamOutput The output of the command.
+ * @return The content of the command, along with a message if it was truncated.
+ */
+ protected def content(streamOutput: ProcessController.StreamOutput) = {
+ var content = streamOutput.content
+ if (streamOutput.contentTruncated)
+ content += "%n%n".format()
+ content
+ }
+
+ /**
+ * Returns the ProcessController for this thread.
+ * @return The ProcessController for this thread.
+ */
+ protected def processController = CommandLineJob.threadProcessController.get
+
+ /** A five mb limit of characters for display. */
+ protected val FIVE_MB = 1024 * 512 * 5;
+}
+
+/**
+ * Base class for a command line job.
+ */
+object CommandLineJob {
+ /** Thread local process controller container. */
+ private val threadProcessController = new ThreadLocal[ProcessController] {
+ override def initialValue = new ProcessController
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
index 5fa902391..4a9fa8f4a 100644
--- a/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
+++ b/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
@@ -2,30 +2,69 @@ package org.broadinstitute.sting.queue.util
import java.io.{IOException, File}
+/**
+ * A collection of utilities for modifying java.io.
+ */
object IOUtils {
+ /** The current directory "." */
val CURRENT_DIR = new File(".")
- def sub(parent: File, subPath: String) = {
- val file = new File(subPath)
+
+ /**
+ * Returns the sub path rooted at the parent.
+ * If the sub path is already absolute, returns the sub path.
+ * If the parent is the current directory, returns the sub path.
+ * If the sub bath is the current directory, returns the parent.
+ * Else returns new File(parent, subPath)
+ * @param parent The parent directory
+ * @param path The sub path to append to the parent, if the path is not absolute.
+ * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
+ */
+ def subDir(dir: File, path: String): File =
+ subDir(dir.getAbsoluteFile, new File(path))
+
+ /**
+ * Returns the sub path rooted at the parent.
+ * If the sub path is already absolute, returns the sub path.
+ * If the parent is the current directory, returns the sub path.
+ * If the sub bath is the current directory, returns the parent.
+ * Else returns new File(parent, subPath)
+ * @param parent The parent directory
+ * @param file The sub path to append to the parent, if the path is not absolute.
+ * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
+ */
+ def subDir(parent: File, file: File): File = {
if (parent == CURRENT_DIR && file == CURRENT_DIR)
- CURRENT_DIR.getCanonicalFile
+ CURRENT_DIR.getCanonicalFile.getAbsoluteFile
else if (parent == CURRENT_DIR || file.isAbsolute)
- file
+ file.getAbsoluteFile
else if (file == CURRENT_DIR)
- parent
+ parent.getAbsoluteFile
else
- new File(parent, subPath)
+ new File(parent, file.getPath).getAbsoluteFile
}
- def temp(prefix: String, suffix: String = "") = {
- val tempDir = File.createTempFile(prefix + "-", suffix)
- if(!tempDir.delete)
- throw new IOException("Could not delete sub file: " + tempDir.getAbsolutePath())
- if(!tempDir.mkdir)
- throw new IOException("Could not create sub directory: " + tempDir.getAbsolutePath())
- tempDir
- }
+ /**
+ * Resets the parent of the file to the directory.
+ * @param dir New parent directory.
+ * @param file Path to the file to be re-rooted.
+ * @return Absolute path to the new file.
+ */
+ def resetParent(dir: File, file: File) = subDir(dir.getAbsoluteFile, file.getName).getAbsoluteFile
- def reset(dir: File, file: File) = sub(dir, file.getName).getAbsoluteFile
- def absolute(dir: File, file: File) = sub(dir, file.getPath).getAbsoluteFile
+ /**
+ * Creates a scatterGatherTempDir directory with the prefix and optional suffix.
+ * @param prefix Prefix for the directory name.
+ * @param suffix Optional suffix for the directory name. Defaults to "".
+ * @return The created temporary directory.
+ * @throws IOException if the directory could not be created.
+ */
+ def tempDir(prefix: String, suffix: String = "") = {
+ val temp = File.createTempFile(prefix + "-", suffix)
+ if(!temp.delete)
+ throw new IOException("Could not delete sub file: " + temp.getAbsolutePath())
+ if(!temp.mkdir)
+ throw new IOException("Could not create sub directory: " + temp.getAbsolutePath())
+ temp
+ }
}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/Logging.scala b/scala/src/org/broadinstitute/sting/queue/util/Logging.scala
index c61a6267f..5a9fed204 100755
--- a/scala/src/org/broadinstitute/sting/queue/util/Logging.scala
+++ b/scala/src/org/broadinstitute/sting/queue/util/Logging.scala
@@ -7,25 +7,5 @@ import org.apache.log4j._
*/
trait Logging {
private val className = this.getClass.getName
- protected lazy val logger = {
- Logging.configureLogging
- Logger.getLogger(className)
- }
-}
-
-object Logging {
- private var configured = false
- private var level = Level.INFO
- def configureLogging = {
- if (!configured) {
- var root = Logger.getRootLogger
- root.addAppender(new ConsoleAppender(new PatternLayout("%-5p %d{HH:mm:ss,SSS} - %m %n")))
- root.setLevel(level)
- configured = true
- }
- }
-
- def setDebug = setLevel(Level.DEBUG)
- def setTrace = setLevel(Level.TRACE)
- private def setLevel(level: Level) = {this.level = level; Logger.getRootLogger.setLevel(level)}
+ protected lazy val logger = Logger.getLogger(className)
}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala b/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala
new file mode 100644
index 000000000..f18ad4304
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala
@@ -0,0 +1,142 @@
+package org.broadinstitute.sting.queue.util
+
+import java.util.regex.Pattern
+import collection.JavaConversions._
+import org.broadinstitute.sting.queue.QException
+
+/**
+ * An job submitted to LSF. This class is designed to work somewhat like
+ * java.lang.Process, but has some extensions.
+ *
+ * @author A subset of the original BroadCore ported to scala by Khalid Shakir
+ */
+class LsfJob extends CommandLineJob with Logging {
+ var name: String = _
+ var project: String = _
+ var queue: String = _
+ var preExecCommand: String = _
+ var postExecCommand: String = _
+ var waitForCompletion = false
+ var extraBsubArgs: List[String] = Nil
+ var bsubJobId: String = _
+
+ /**
+ * Starts the job. Command must exist. The job will be submitted to LSF.
+ */
+ def run() = {
+ assert(bsubJobId == null, "LSF job was already started")
+ assert(command != null, "Command was not set on LSF job")
+ assert(outputFile != null, "Output file must be set on LSF job")
+
+ // capture the output for debugging
+ val stdinSettings = new ProcessController.InputStreamSettings(null, null)
+ val stdoutSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false)
+ val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false)
+
+ // This is really nice for debugging, but spits out way too much stuff otherwise!
+ // log.info("About to execute LSF command: " + StringUtils.join(argArray, " "));
+
+ // Get environment vars and strip out LD_ASSUME_KERNEL
+ // This is necessary since GAP servers on linux 2.4.x kernel and can be removed when
+ // its no longer true. Only 'classic' LSF queue has 2.4 kernel-based machines.
+
+ // launch the bsub job from the current directory
+ val processSettings = new ProcessController.ProcessSettings(
+ bsubCommand, environmentVariables, null, stdinSettings, stdoutSettings, stderrSettings, false)
+ val bsubOutput = processController.exec(processSettings)
+
+ if (bsubOutput.exitValue != 0) {
+ logger.error("Failed to submit LSF job, got exit code %s. Standard error contained: %n%s"
+ .format(bsubOutput.exitValue, content(bsubOutput.stderr)))
+ throw new QException("Failed to submit LSF job, got exit code %s.".format(bsubOutput.exitValue))
+ }
+
+ // get the LSF job ID
+ val matcher = LsfJob.JOB_ID.matcher(bsubOutput.stdout.content)
+ matcher.find()
+ bsubJobId = matcher.group
+
+ // set job name to LSF_ if not set already
+ if (name == null)
+ name = "lsf_job_" + bsubJobId
+ }
+
+ /**
+ * Generates the bsub command line for this LsfJob.
+ * @return command line as a Array[String]
+ */
+ def bsubCommand = {
+ var args = List.empty[String]
+ args :+= "bsub"
+
+ if (name != null) {
+ args :+= "-J"
+ args :+= name
+ }
+
+ if (inputFile != null) {
+ args :+= "-i"
+ args :+= inputFile.getAbsolutePath
+ }
+
+ args :+= "-o"
+ args :+= outputFile.getAbsolutePath
+
+ if (errorFile != null) {
+ args :+= "-e"
+ args :+= errorFile.getAbsolutePath
+ }
+
+ if (queue != null) {
+ args :+= "-q"
+ args :+= queue
+ }
+
+ if (project != null) {
+ args :+= "-P"
+ args :+= project
+ }
+
+ if (preExecCommand != null) {
+ args :+= "-E"
+ args :+= preExecCommand
+ }
+
+ if (postExecCommand != null) {
+ args :+= "-Ep"
+ args :+= postExecCommand
+ }
+
+ if (workingDir != null) {
+ args :+= "-cwd"
+ args :+= workingDir.getPath
+ }
+
+ if (waitForCompletion) {
+ args :+= "-K"
+ }
+
+ args ++= extraBsubArgs
+
+ args :+= command
+
+ args.toArray
+ }
+
+ /**
+ * Get the list of environment variables and pass into the exec job. We strip
+ * out LD_ASSUME_KERNEL because it behaves badly when running bsub jobs across
+ * different versions of the linux OS.
+ *
+ * @return array of environment vars in 'name=value' format.
+ */
+ private def environmentVariables =
+ System.getenv()
+ .filterNot{case (name, value) => name.equalsIgnoreCase("LD_ASSUME_KERNEL") || value == null}
+ .toMap
+}
+
+object LsfJob {
+ /** Used to search the stdout for the job id. */
+ private val JOB_ID = Pattern.compile("\\d+")
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala
new file mode 100644
index 000000000..80162582e
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala
@@ -0,0 +1,360 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io._
+import scala.collection.mutable.{HashSet, ListMap}
+
+/**
+ * Facade to Runtime.exec() and java.lang.Process. Handles
+ * running a process to completion and returns stdout and stderr
+ * as strings. Creates separate threads for reading stdout and stderr,
+ * then reuses those threads for each process most efficient use is
+ * to create one of these and use it repeatedly. Instances are not
+ * thread-safe, however.
+ *
+ * @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir
+ */
+class ProcessController extends Logging {
+
+ // Threads that capture stdout and stderr
+ private val stdoutCapture = new OutputCapture(ProcessController.STDOUT_KEY)
+ private val stderrCapture = new OutputCapture(ProcessController.STDERR_KEY)
+
+ // Communication channels with output capture threads
+ /** Holds the stdout and stderr sent to the background capture threads */
+ private val toCapture = new ListMap[String, ProcessController.CapturedStreamOutput]
+
+ /** Holds the results of the capture from the background capture threads.
+ * May be the content via toCapture or an EmptyStreamOutput if the capture was interrupted. */
+ private val fromCapture = new ListMap[String, ProcessController.StreamOutput]
+
+ // Start the background threads for this controller.
+ stdoutCapture.start()
+ stderrCapture.start()
+
+ /**
+ * Executes a command line program with the settings and waits for it to return, processing the output on a background thread.
+ * @param settings Settings to be run.
+ * @return The output of the command.
+ */
+ def exec(settings: ProcessController.ProcessSettings): ProcessController.ProcessOutput = {
+ var builder = new ProcessBuilder(settings.cmdarray:_*)
+ builder.directory(settings.directory)
+
+ if (settings.environment != null) {
+ val builderEnvironment = builder.environment
+ builderEnvironment.clear()
+ settings.environment.foreach{case (name, value) => builderEnvironment.put(name, value)}
+ }
+
+ builder.redirectErrorStream(settings.redirectErrorStream)
+
+ var stdout: ProcessController.StreamOutput = null
+ var stderr: ProcessController.StreamOutput = null
+ val process = builder.start
+
+ ProcessController.running.add(process)
+ try {
+ val stdoutSettings = if (settings.stdoutSettings == null) ProcessController.EmptyStreamSettings else settings.stdoutSettings
+ val stderrSettings = if (settings.stderrSettings == null) ProcessController.EmptyStreamSettings else settings.stderrSettings
+
+ toCapture.synchronized {
+ toCapture.put(ProcessController.STDOUT_KEY, new ProcessController.CapturedStreamOutput(process.getInputStream, stdoutSettings))
+ toCapture.put(ProcessController.STDERR_KEY, new ProcessController.CapturedStreamOutput(process.getErrorStream, stderrSettings))
+ toCapture.notifyAll()
+ }
+
+ if (settings.stdinSettings.input != null) {
+ val writer = new OutputStreamWriter(process.getOutputStream)
+ writer.write(settings.stdinSettings.input)
+ writer.flush()
+ }
+ if (settings.stdinSettings.inputFile != null) {
+ val reader = new FileReader(settings.stdinSettings.inputFile)
+ val writer = new OutputStreamWriter(process.getOutputStream)
+ val buf = new Array[Char](4096)
+ var readCount = 0
+ while ({readCount = reader.read(buf); readCount} >= 0)
+ writer.write(buf, 0, readCount)
+ writer.flush()
+ reader.close()
+ }
+
+ try {
+ process.getOutputStream.close()
+ process.waitFor()
+ } finally {
+ while (stdout == null || stderr == null) {
+ fromCapture.synchronized {
+ fromCapture.remove(ProcessController.STDOUT_KEY) match {
+ case Some(stream) => stdout = stream
+ case None => /* ignore */
+ }
+ fromCapture.remove(ProcessController.STDERR_KEY) match {
+ case Some(stream) => stderr = stream
+ case None => /* ignore */
+ }
+
+ try {
+ if (stdout == null || stderr == null)
+ fromCapture.wait()
+ } catch {
+ case e: InterruptedException =>
+ logger.error(e)
+ }
+ }
+ }
+ }
+ } finally {
+ ProcessController.running.remove(process)
+ }
+
+ new ProcessController.ProcessOutput(process.exitValue, stdout, stderr)
+ }
+
+ /** Ensures that the threads used to manipulate the IO for the process are cleaned up properly. */
+ def close() = {
+ try {
+ stdoutCapture.interrupt()
+ stderrCapture.interrupt()
+ } catch {
+ case e =>
+ logger.error(e)
+ }
+ }
+
+ /** calls close() */
+ override def finalize = close()
+
+ /**
+ * Reads in the output of a stream on a background thread to keep the output pipe from backing up and freezing the called process.
+ * @param key The stdout or stderr key for this output capture.
+ */
+ private class OutputCapture(private val key: String)
+ extends Thread("OutputCapture-" + key + "-" + Thread.currentThread.getName) {
+
+ setDaemon(true)
+
+ /** Runs the capture. */
+ override def run = {
+ var break = false
+ while (!break) {
+ var processStream: ProcessController.StreamOutput = ProcessController.EmptyStreamOutput
+ try {
+ // Wait for a new input stream to be passed from this process controller.
+ var capturedProcessStream: ProcessController.CapturedStreamOutput = null
+ while (capturedProcessStream == null) {
+ toCapture.synchronized {
+ toCapture.remove(key) match {
+ case Some(stream) => capturedProcessStream = stream
+ case None => toCapture.wait()
+ }
+ }
+ }
+ // Read in the input stream
+ processStream = capturedProcessStream
+ capturedProcessStream.read
+ } catch {
+ case e: InterruptedException => {
+ logger.info("OutputReader interrupted, exiting")
+ break = true
+ }
+ case e: IOException => {
+ logger.error("Error reading process output", e)
+ }
+ } finally {
+ // Send the string back to the process controller.
+ fromCapture.synchronized {
+ fromCapture.put(key, processStream)
+ fromCapture.notify()
+ }
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Facade to Runtime.exec() and java.lang.Process. Handles
+ * running a process to completion and returns stdout and stderr
+ * as strings. Creates separate threads for reading stdout and stderr,
+ * then reuses those threads for each process most efficient use is
+ * to create one of these and use it repeatedly. Instances are not
+ * thread-safe, however.
+ *
+ * @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir
+ */
+object ProcessController extends Logging {
+
+ /**
+ * Settings that define how to run a process.
+ * @param cmdarray Command line to run.
+ * @param environment Environment settings to override System.getEnv, or null to use System.getEnv.
+ * @param directory The directory to run the command in, or null to run in the current directory.
+ * @param stdinSettings Settings for writing to the process stdin.
+ * @param stdoutSettings Settings for capturing the process stdout.
+ * @param stderrSettings Setting for capturing the process stderr.
+ * @param redirectErrorStream true if stderr should be sent to stdout.
+ */
+ class ProcessSettings(val cmdarray: Array[String], val environment: Map[String, String], val directory: File,
+ val stdinSettings: InputStreamSettings, val stdoutSettings: OutputStreamSettings,
+ val stderrSettings: OutputStreamSettings, val redirectErrorStream: Boolean)
+
+ /**
+ * Settings that define text to write to the process stdin.
+ * @param input String to write to stdin.
+ * @param inputFile File to write to stdin.
+ */
+ class InputStreamSettings(val input: String, val inputFile: File)
+
+ /**
+ * Settings that define text to capture from a process stream.
+ * @param stringSize The number of characters to capture, or -1 for unlimited.
+ * @param outputFile The file to write output to, or null to skip output.
+ * @param outputFileAppend true if the output file should be appended to.
+ */
+ class OutputStreamSettings(val stringSize: Int, val outputFile: File, val outputFileAppend: Boolean)
+
+ /**
+ * The output of a process.
+ * @param exitValue The exit value.
+ * @param stdout The capture of stdout as defined by the stdout OutputStreamSettings.
+ * @param stderr The capture of stderr as defined by the stderr OutputStreamSettings.
+ */
+ class ProcessOutput(val exitValue: Int, val stdout: StreamOutput, val stderr: StreamOutput)
+
+ /**
+ * The base class of stream output.
+ */
+ abstract class StreamOutput {
+ /**
+ * Returns the content as a string.
+ * @return The content as a string.
+ */
+ def content: String
+
+ /**
+ * Returns true if the content was truncated.
+ * @return true if the content was truncated.
+ */
+ def contentTruncated: Boolean
+ }
+
+ private var currentCaptureId = 0
+ /**
+ * Returns the next output capture id.
+ * @return The next output capture id.
+ */
+ private def NEXT_OUTPUT_CAPTURE_ID = {
+ currentCaptureId += 1
+ currentCaptureId
+ }
+ private val STDOUT_KEY = "stdout"
+ private val STDERR_KEY = "stderr"
+
+ /** Tracks running processes so that they can be killed as the JVM shuts down. */
+ private val running = new HashSet[Process]()
+ Runtime.getRuntime.addShutdownHook(new Thread {
+ /** Kills running processes as the JVM shuts down. */
+ override def run = for (process <- running.clone) {
+ logger.warn("Killing: " + process)
+ process.destroy
+ }
+ })
+
+ /** Empty stream settings used when no output is requested. */
+ private object EmptyStreamSettings extends OutputStreamSettings(0, null, false)
+
+ /** Empty stream output when no output is captured due to an error. */
+ private object EmptyStreamOutput extends StreamOutput {
+ def content = ""
+ def contentTruncated = false
+ }
+
+ /**
+ * Stream output captured from a stream.
+ * @param stream Stream to capture output.
+ * @param settings Settings that define what to capture.
+ */
+ private class CapturedStreamOutput(val stream: InputStream, val settings: OutputStreamSettings) extends StreamOutput {
+ /**
+ * Returns the captured content as a string.
+ * @return The captured content as a string.
+ */
+ def content = stringWriter.toString()
+
+ /**
+ * Returns true if the captured content was truncated.
+ * @return true if the captured content was truncated.
+ */
+ def contentTruncated = stringTruncated
+
+ /**
+ * Drain the input stream to keep the process from backing up until it's empty.
+ */
+ def read() = {
+ val reader = new InputStreamReader(stream)
+ val buf = new Array[Char](4096)
+ var readCount = 0
+ while ({readCount = reader.read(buf); readCount} >= 0) {
+ writeString(buf, readCount)
+ writeFile(buf, readCount)
+ }
+ closeFile()
+ stream.close()
+ }
+
+ /** The string to write capture content. */
+ private lazy val stringWriter = if (settings.stringSize < 0) new StringWriter else new StringWriter(settings.stringSize)
+
+ /** True if the content is truncated. */
+ private var stringTruncated = false
+
+ /** The number of characters left until the buffer is full. */
+ private var stringRemaining = settings.stringSize
+
+ /**
+ * Writes the buffer to the stringWriter up to stringRemaining characters.
+ * @param chars Character buffer to write.
+ * @param len Number of characters in the buffer.
+ */
+ private def writeString(chars: Array[Char], len: Int) = {
+ if (settings.stringSize < 0) {
+ stringWriter.write(chars, 0, len)
+ } else {
+ if (!stringTruncated) {
+ stringWriter.write(chars, 0, if (len > stringRemaining) stringRemaining else len)
+ stringRemaining -= len
+ if (stringRemaining < 0)
+ stringTruncated = true
+ }
+ }
+ }
+
+ /** The file writer to capture content or null if no output file was requested. */
+ private lazy val fileWriter = {
+ if (settings.outputFile == null) {
+ null
+ } else {
+ new FileWriter(settings.outputFile, settings.outputFileAppend)
+ }
+ }
+
+ /**
+ * Writes the buffer to the fileWriter if it is not null.
+ * @param chars Character buffer to write.
+ * @param len Number of characters in the buffer.
+ */
+ private def writeFile(chars: Array[Char], len: Int) = {
+ if (fileWriter != null)
+ fileWriter.write(chars, 0, len)
+ }
+
+ /** Closes the fileWriter if it is not null. */
+ private def closeFile() = {
+ if (fileWriter != null) {
+ fileWriter.flush
+ fileWriter.close
+ }
+ }
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala
deleted file mode 100755
index f79a4f33d..000000000
--- a/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package org.broadinstitute.sting.queue.util
-
-import org.broadinstitute.sting.utils.text.XReadLines
-import collection.mutable.ListBuffer
-import collection.JavaConversions._
-import java.io.File
-
-object ProcessUtils extends Logging {
-
- Runtime.getRuntime.addShutdownHook(new Thread {
- override def run = for (process <- running.clone) {
- logger.warn("Killing: " + process)
- process.destroy
- }
- })
-
- val running = new ListBuffer[Process]()
-
- def runCommandAndWait(command: String, directory: File) = {
- logger.debug("Running command: " + command)
-
- var builder = new ProcessBuilder("sh", "-c", command).directory(directory)
-
- var process = builder.start
- running += process
- var result = process.waitFor
- running -= process
-
- if (logger.isDebugEnabled) {
- for (line <- new XReadLines(process.getInputStream).iterator) {
- logger.debug("command: " + line)
- }
-
- for (line <- new XReadLines(process.getErrorStream).iterator) {
- logger.error("command: " + line)
- }
- }
-
- logger.debug("Command exited with result: " + result)
-
- result
- }
-}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala
index 566e3cc02..6f6ffdcc7 100644
--- a/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala
+++ b/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala
@@ -2,67 +2,90 @@ package org.broadinstitute.sting.queue.util
import org.broadinstitute.sting.queue.QException
import java.lang.annotation.Annotation
-import scala.concurrent.JavaConversions._
import java.lang.reflect.{ParameterizedType, Field}
import org.broadinstitute.sting.commandline.ClassType
+import org.broadinstitute.sting.utils.classloader.JVMUtils
+/**
+ * A collection of scala extensions to the Sting JVMUtils.
+ */
object ReflectionUtils {
+
+ /**
+ * Returns true if field has the annotation.
+ * @param field Field to check.
+ * @param annotation Class of the annotation to look for.
+ * @return true if field has the annotation.
+ */
def hasAnnotation(field: Field, annotation: Class[_ <: Annotation]) = field.getAnnotation(annotation) != null
+ /**
+ * Gets the annotation or throws an exception if the annotation is not found.
+ * @param field Field to check.
+ * @param annotation Class of the annotation to look for.
+ * @return The annotation.
+ */
def getAnnotation[T <: Annotation](field: Field, annotation: Class[T]): T = {
if (!hasAnnotation(field, annotation))
throw new QException("Field %s is missing annotation %s".format(field, annotation))
field.getAnnotation(annotation).asInstanceOf[T]
}
-
+
+ /**
+ * Returns all the declared fields on a class in order of sub type to super type.
+ * @param clazz Base class to start looking for fields.
+ * @return List[Field] found on the class and all super classes.
+ */
def getAllFields(clazz: Class[_]) = getAllTypes(clazz).map(_.getDeclaredFields).flatMap(_.toList)
- def filterFields(fields: List[Field], annotation: Class[_ <: Annotation]) = fields.filter(field => hasAnnotation(field, annotation))
-
- def getFieldValues(obj: AnyRef, fields: List[Field]) = fields.map(field => fieldGetter(field).invoke(obj))
-
+ /**
+ * Gets all the types on a class in order of sub type to super type.
+ * @param clazz Base class.
+ * @return List[Class] including the class and all super classes.
+ */
def getAllTypes(clazz: Class[_]) = {
var types = List.empty[Class[_]]
- var c = clazz
- while (c != null) {
- types :+= c
- c = c.getSuperclass
- }
+ var c = clazz
+ while (c != null) {
+ types :+= c
+ c = c.getSuperclass
+ }
types
}
- def getValue(obj: AnyRef, field: Field) = fieldGetter(field).invoke(obj)
- def setValue(obj: AnyRef, field: Field, value: Any) = fieldSetter(field).invoke(obj, value.asInstanceOf[AnyRef])
-
- def addOrUpdateWithStringValue(obj: AnyRef, field: Field, value: String) = {
- val getter = fieldGetter(field)
- val setter = fieldSetter(field)
-
- if (classOf[Seq[_]].isAssignableFrom(field.getType)) {
-
- val fieldType = getCollectionType(field)
- val typeValue = coerce(fieldType, value)
-
- var list = getter.invoke(obj).asInstanceOf[Seq[_]]
- list :+= typeValue
- setter.invoke(obj, list)
-
- } else if (classOf[Option[_]].isAssignableFrom(field.getType)) {
-
- val fieldType = getCollectionType(field)
- val typeValue = coerce(fieldType, value)
-
- setter.invoke(obj, Some(typeValue))
-
- } else {
-
- val fieldType = field.getType
- val typeValue = coerce(fieldType, value)
-
- setter.invoke(obj, typeValue.asInstanceOf[AnyRef])
+ /**
+ * Gets a field value using reflection.
+ * Attempts to use the scala getter then falls back to directly accessing the field.
+ * @param obj Object to inspect.
+ * @param field Field to retrieve.
+ * @return The field value.
+ */
+ def getValue(obj: AnyRef, field: Field): AnyRef =
+ try {
+ field.getDeclaringClass.getMethod(field.getName).invoke(obj)
+ } catch {
+ case e: NoSuchMethodException => JVMUtils.getFieldValue(field, obj)
}
- }
+ /**
+ * Sets a field value using reflection.
+ * Attempts to use the scala setter then falls back to directly accessing the field.
+ * @param obj Object to inspect.
+ * @param field Field to set.
+ * @param value The new field value.
+ */
+ def setValue(obj: AnyRef, field: Field, value: Any) =
+ try {
+ field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType).invoke(obj, value.asInstanceOf[AnyRef])
+ } catch {
+ case e: NoSuchMethodException => JVMUtils.setFieldValue(field, obj, value)
+ }
+
+ /**
+ * Returns the collection type of a field or throws an exception if the field contains more than one parameterized type, or the collection type cannot be found.
+ * @param field Field to retrieve the collection type.
+ * @return The collection type for the field.
+ */
def getCollectionType(field: Field) = {
getGenericTypes(field) match {
case Some(classes) =>
@@ -70,10 +93,15 @@ object ReflectionUtils {
throw new IllegalArgumentException("Field contains more than one generic type: " + field)
classes(0)
case None =>
- throw new QException("Generic type not set for collection: " + field)
+ throw new QException("Generic type not set for collection. Did it declare an @ClassType?: " + field)
}
}
+ /**
+ * Returns the generic types for a field or None.
+ * @param field Field to retrieve the collection type.
+ * @return The array of classes that are in the collection type, or None if the type cannot be found.
+ */
private def getGenericTypes(field: Field): Option[Array[Class[_]]] = {
// TODO: Refactor: based on java code in org.broadinstitute.sting.commandline.ArgumentTypeDescriptor
// If this is a parameterized collection, find the contained type. If blow up if only one type exists.
@@ -85,39 +113,4 @@ object ReflectionUtils {
}
else None
}
-
- private def fieldGetter(field: Field) =
- try {
- field.getDeclaringClass.getMethod(field.getName)
- } catch {
- case e: NoSuchMethodException => throw new QException("Field may be private? Unable to find getter for field: " + field)
- }
-
- private def fieldSetter(field: Field) =
- try {
- field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType)
- } catch {
- case e: NoSuchMethodException => throw new QException("Field may be a val instead of var? Unable to find setter for field: " + field)
- }
-
- private def coerce(clazz: Class[_], value: String) = {
- if (classOf[String] == clazz) value
- else if (classOf[Boolean] == clazz) value.toBoolean
- else if (classOf[Byte] == clazz) value.toByte
- else if (classOf[Short] == clazz) value.toShort
- else if (classOf[Int] == clazz) value.toInt
- else if (classOf[Long] == clazz) value.toLong
- else if (classOf[Float] == clazz) value.toFloat
- else if (classOf[Double] == clazz) value.toDouble
- else if (hasStringConstructor(clazz))
- clazz.getConstructor(classOf[String]).newInstance(value)
- else throw new QException("Unable to coerce value '%s' to type '%s'.".format(value, clazz))
- }
-
- private def hasStringConstructor(clazz: Class[_]) = {
- clazz.getConstructors.exists(constructor => {
- val parameters = constructor.getParameterTypes
- parameters.size == 1 && parameters.head == classOf[String]
- })
- }
}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala
new file mode 100644
index 000000000..f2c84649c
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala
@@ -0,0 +1,71 @@
+package org.broadinstitute.sting.queue.util
+
+import collection.JavaConversions._
+import org.broadinstitute.sting.queue.QException
+import java.lang.Class
+import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor}
+
+/**
+ * An ArgumentTypeDescriptor that can parse the scala collections.
+ */
+class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
+
+ /**
+ * Checks if the class type is a scala collection.
+ * @param classType Class type to check.
+ * @return true if the class is a List, Set, or an Option.
+ */
+ def supports(classType: Class[_]) = isCompound(classType)
+
+ /**
+ * Checks if the class type is a scala collection.
+ * @param source Argument source to check.
+ * @return true if the source is a List, Set, or an Option.
+ */
+ override def isMultiValued(source: ArgumentSource) = isCompound(source.field.getType)
+
+ /**
+ * Checks if the class type is a scala collection.
+ * @param classType Class type to check.
+ * @return true if the class is a List, Set, or an Option.
+ */
+ private def isCompound(classType: Class[_]) = {
+ classOf[List[_]].isAssignableFrom(classType) ||
+ classOf[Set[_]].isAssignableFrom(classType) ||
+ classOf[Option[_]].isAssignableFrom(classType)
+ }
+
+ /**
+ * Parses the argument matches based on the class type of the argument source's field.
+ * @param source Argument source that contains the field being populated.
+ * @param classType Class type being parsed.
+ * @param argumentMatches The argument match strings that were found for this argument source.
+ * @return The parsed object.
+ */
+ def parse(source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = {
+ val componentType = ReflectionUtils.getCollectionType(source.field)
+ val componentArgumentParser = ArgumentTypeDescriptor.create(componentType)
+
+ if (classOf[List[_]].isAssignableFrom(classType)) {
+ var list = List.empty[Any]
+ for (argumentMatch <- argumentMatches)
+ for (value <- argumentMatch)
+ list :+= componentArgumentParser.parse(source, componentType, new ArgumentMatches(value))
+ list
+ } else if (classOf[Set[_]].isAssignableFrom(classType)) {
+ var set = Set.empty[Any]
+ for (argumentMatch <- argumentMatches)
+ for (value <- argumentMatch)
+ set += componentArgumentParser.parse(source, componentType, new ArgumentMatches(value))
+ set
+ } else if (classOf[Option[_]].isAssignableFrom(classType)) {
+ if (argumentMatches.size > 1)
+ throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" "))
+ else if (argumentMatches.size == 1)
+ Some(componentArgumentParser.parse(source, componentType, argumentMatches))
+ else
+ None
+ } else
+ throw new QException("Unsupported compound argument type: " + classType)
+ }
+}
diff --git a/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala
new file mode 100755
index 000000000..e4f8f2899
--- /dev/null
+++ b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala
@@ -0,0 +1,37 @@
+package org.broadinstitute.sting.queue.util
+
+import org.broadinstitute.sting.queue.QException
+
+/**
+ * Runs a job on the command line by invoking "sh -c "
+ */
+class ShellJob extends CommandLineJob with Logging {
+ /**
+ * Runs the command and waits for the output.
+ */
+ def run() = {
+ assert(command != null, "Command was not set on job")
+
+ val (redirectError, errorFile) = if (this.errorFile == null) (true, null) else (false, this.errorFile)
+ val bufferSize = if (logger.isDebugEnabled) FIVE_MB else 0
+ val stdinSettings = new ProcessController.InputStreamSettings(null, this.inputFile)
+ val stdoutSettings = new ProcessController.OutputStreamSettings(bufferSize, this.outputFile, true)
+ val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, errorFile, true)
+ val processSettings = new ProcessController.ProcessSettings(
+ Array("sh", "-c", command), null, this.workingDir, stdinSettings, stdoutSettings, stderrSettings, redirectError)
+
+ val output = processController.exec(processSettings)
+
+ if (logger.isDebugEnabled) {
+ logger.debug("output: " + content(output.stdout))
+ logger.debug("error: " + content(output.stderr))
+ logger.debug("Command exited with result: " + output.exitValue)
+ }
+
+ if (output.exitValue != 0) {
+ logger.error("Failed to run job, got exit code %s. Standard error contained: %n%s"
+ .format(output.exitValue, content(output.stderr)))
+ throw new QException("Failed to run job, got exit code %s.".format(output.exitValue))
+ }
+ }
+}
diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml
index 9a2acdd28..e5f39d0f2 100644
--- a/settings/ivysettings.xml
+++ b/settings/ivysettings.xml
@@ -6,7 +6,9 @@
-
+
+
+
@@ -15,5 +17,8 @@
+
+
+
diff --git a/settings/repository/edu.mit.broad/broad-core-all-2.8.jar b/settings/repository/edu.mit.broad/broad-core-all-2.8.jar
deleted file mode 100644
index 715288886..000000000
Binary files a/settings/repository/edu.mit.broad/broad-core-all-2.8.jar and /dev/null differ
diff --git a/settings/repository/edu.mit.broad/broad-core-all-2.8.xml b/settings/repository/edu.mit.broad/broad-core-all-2.8.xml
deleted file mode 100644
index 7e7b31e80..000000000
--- a/settings/repository/edu.mit.broad/broad-core-all-2.8.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
diff --git a/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml b/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml
index 65899298f..75fd688fb 100644
--- a/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml
+++ b/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml
@@ -1,3 +1,12 @@
+
+
+
+
+
+
+
+
+