diff --git a/build.xml b/build.xml index dda03c288..7f69a3a12 100644 --- a/build.xml +++ b/build.xml @@ -7,7 +7,14 @@ + + + + + + + @@ -24,9 +31,7 @@ - - @@ -55,21 +60,28 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + - + - + + + + + + + + + + + + + - + + + + + + + Building Queue... + + + + + + + + + + + + + + + + Generating Queue GATK extensions... + + + + + Building Queue GATK extensions... + + + + + + + + + + + additionalparam="-build-timestamp "${build.timestamp}" -version-suffix .${build.version} -out ${basedir}/${resource.path}"> - + + + @@ -141,14 +249,20 @@ + + + + + @@ -193,12 +307,46 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -206,12 +354,6 @@ - - - - - - @@ -232,29 +374,57 @@ + + + + + + + + + + + + + + + + + + + + - + - + - + + + + + + + + + @@ -300,46 +470,6 @@ - - - - - - - - - - - - - - - - - - - - Building Queue... - - - - - - - - - - - - - - - - - - - - diff --git a/ivy.xml b/ivy.xml index 3c9e6a4b0..cebf26a86 100644 --- a/ivy.xml +++ b/ivy.xml @@ -8,46 +8,40 @@ - - - - - - - - - - + + + + + + + + + + - - - - - - - - - + - + - + - - - - - + + + + + + + + - - + + - - + @@ -56,5 +50,8 @@ + + + diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 143c3e6d3..91cbf5f8c 100755 --- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.analyzecovariates; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.walkers.recalibration.*; import org.broadinstitute.sting.utils.classloader.PackageUtils; import org.broadinstitute.sting.utils.text.XReadLines; @@ -51,7 +52,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { // Command Line Arguments ///////////////////////////// - @Argument(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) + @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) private String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) private String OUTPUT_DIR = "analyzeCovariates/"; diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java b/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java index f206aac58..000d540fc 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java @@ -40,6 +40,11 @@ public class ArgumentDefinition { */ public final ArgumentIOType ioType; + /** + * The class of the argument. + */ + public final Class argumentType; + /** * Full name of the argument. Must have a value. */ @@ -70,6 +75,11 @@ public class ArgumentDefinition { */ public final boolean isMultiValued; + /** + * The class of the componentType. Not used for scalars. + */ + public final Class componentType; + /** * Is this argument hidden from the help system? */ @@ -93,35 +103,41 @@ public class ArgumentDefinition { /** * Creates a new argument definition. * @param ioType Whether the argument is an input or an output. + * @param argumentType The class of the field. * @param fullName Full name for this argument definition. * @param shortName Short name for this argument definition. * @param doc Doc string for this argument. * @param required Whether or not this argument is required. * @param isFlag Whether or not this argument should be treated as a flag. * @param isMultiValued Whether or not this argument supports multiple values. + * @param componentType For multivalued arguments the type of the components. * @param isHidden Whether or not this argument should be hidden from the command-line argument system. * @param exclusiveOf Whether this command line argument is mutually exclusive of other arguments. * @param validation A regular expression for command-line argument validation. * @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null. */ public ArgumentDefinition( ArgumentIOType ioType, + Class argumentType, String fullName, String shortName, String doc, boolean required, boolean isFlag, boolean isMultiValued, + Class componentType, boolean isHidden, String exclusiveOf, String validation, List validOptions) { this.ioType = ioType; + this.argumentType = argumentType; this.fullName = fullName; this.shortName = shortName; this.doc = doc; this.required = required; this.isFlag = isFlag; this.isMultiValued = isMultiValued; + this.componentType = componentType; this.isHidden = isHidden; this.exclusiveOf = exclusiveOf; this.validation = validation; @@ -131,18 +147,22 @@ public class ArgumentDefinition { /** * Creates a new argument definition. * @param annotation The annotation on the field. + * @param argumentType The class of the field. * @param defaultFullName Default full name for this argument definition. * @param defaultShortName Default short name for this argument definition. * @param isFlag Whether or not this argument should be treated as a flag. * @param isMultiValued Whether or not this argument supports multiple values. + * @param componentType For multivalued arguments the type of the components. * @param isHidden Whether or not this argument should be hidden from the command-line argument system. * @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null. */ public ArgumentDefinition( Annotation annotation, + Class argumentType, String defaultFullName, String defaultShortName, boolean isFlag, boolean isMultiValued, + Class componentType, boolean isHidden, List validOptions) { @@ -162,13 +182,15 @@ public class ArgumentDefinition { else shortName = null; - this.ioType = getIOType(annotation); + this.ioType = ArgumentIOType.getIOType(annotation); + this.argumentType = argumentType; this.fullName = fullName; this.shortName = shortName; this.doc = getDoc(annotation); this.required = isRequired(annotation, isFlag); this.isFlag = isFlag; this.isMultiValued = isMultiValued; + this.componentType = componentType; this.isHidden = isHidden; this.exclusiveOf = getExclusiveOf(annotation); this.validation = getValidationRegex(annotation); @@ -178,25 +200,31 @@ public class ArgumentDefinition { /** * Creates a new argument definition. * @param annotation The annotation on the field. + * @param argumentType The class of the field. * @param fieldName Default full name for this argument definition. * @param isFlag Whether or not this argument should be treated as a flag. * @param isMultiValued Whether or not this argument supports multiple values. + * @param componentType For multivalued arguments the type of the components. * @param isHidden Whether or not this argument should be hidden from the command-line argument system. * @param validOptions is there a particular list of options that's valid for this argument definition? List them if so, otherwise set this to null. */ public ArgumentDefinition( Annotation annotation, + Class argumentType, String fieldName, boolean isFlag, boolean isMultiValued, + Class componentType, boolean isHidden, List validOptions) { - this.ioType = getIOType(annotation); + this.ioType = ArgumentIOType.getIOType(annotation); + this.argumentType = argumentType; this.fullName = getFullName(annotation, fieldName); this.shortName = getShortName(annotation); this.doc = getDoc(annotation); this.required = isRequired(annotation, isFlag); this.isFlag = isFlag; this.isMultiValued = isMultiValued; + this.componentType = componentType; this.isHidden = isHidden; this.exclusiveOf = getExclusiveOf(annotation); this.validation = getValidationRegex(annotation); @@ -222,17 +250,6 @@ public class ArgumentDefinition { Utils.equals(shortName,other.shortName); } - /** - * Returns the ArgumentIOType for the annotation. - * @param annotation @Input or @Output - * @return ArgumentIOType.Input, Output, or Unknown - */ - public static ArgumentIOType getIOType(Annotation annotation) { - if (annotation instanceof Input) return ArgumentIOType.INPUT; - if (annotation instanceof Output) return ArgumentIOType.OUTPUT; - return ArgumentIOType.UNKNOWN; - } - /** * A hack to get around the fact that Java doesn't like inheritance in Annotations. * @param annotation to run the method on diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java b/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java index af516004a..03e3066fb 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentIOType.java @@ -24,6 +24,28 @@ package org.broadinstitute.sting.commandline; +import org.broadinstitute.sting.utils.StingException; + +import java.lang.annotation.Annotation; + public enum ArgumentIOType { - INPUT, OUTPUT, UNKNOWN + INPUT(Input.class), OUTPUT(Output.class), ARGUMENT(Argument.class); + + public final Class annotationClass; + + ArgumentIOType(Class annotationClass) { + this.annotationClass = annotationClass; + } + + /** + * Returns the ArgumentIOType for the annotation. + * @param annotation @Input or @Output + * @return ArgumentIOType.Input, Output, or Unknown + */ + public static ArgumentIOType getIOType(Annotation annotation) { + for (ArgumentIOType ioType: ArgumentIOType.values()) + if (ioType.annotationClass.isAssignableFrom(annotation.getClass())) + return ioType; + throw new StingException("Unknown annotation type: " + annotation); + } } diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java new file mode 100755 index 000000000..56bedc012 --- /dev/null +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import java.util.*; + +/** + * A mapping of all the sites where an argument definition maps to a site on the command line. + */ +public class ArgumentMatch implements Iterable { + /** + * The argument definition that's been matched. + */ + public final ArgumentDefinition definition; + + /** + * The text that's been matched, as it appears in the command line arguments. + */ + public final String label; + + /** + * Maps indicies of command line arguments to values paired with that argument. + */ + public final SortedMap> indices = new TreeMap>(); + + /** + * Create a new argument match, defining its properties later. Used to create invalid arguments. + */ + public ArgumentMatch() { + this.label = null; + this.definition = null; + } + + /** + * A simple way of indicating that an argument with the given label and definition exists at this index. + * @param label Label of the argument match. Must not be null. + * @param definition The associated definition, if one exists. May be null. + * @param index Position of the argument. Must not be null. + */ + public ArgumentMatch( String label, ArgumentDefinition definition, int index ) { + this( label, definition, index, null ); + } + + private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) { + this.label = label; + this.definition = definition; + + ArrayList values = new ArrayList(); + if( value != null ) + values.add(value); + indices.put(index,values ); + } + + /** + * Return a string representation of the given argument match, for debugging purposes. + * @return String representation of the match. + */ + public String toString() { + return label; + } + + /** + * Creates an iterator that walks over each individual match at each position of a given argument. + * @return An iterator over the individual matches in this argument. Will not be null. + */ + public Iterator iterator() { + return new Iterator() { + /** + * Iterate over each the available index. + */ + private Iterator indexIterator = null; + + /** + * Iterate over each available token. + */ + private Iterator tokenIterator = null; + + /** + * The next index to return. Null if none remain. + */ + Integer nextIndex = null; + + /** + * The next token to return. Null if none remain. + */ + String nextToken = null; + + { + indexIterator = indices.keySet().iterator(); + prepareNext(); + } + + /** + * Is there a nextToken available to return? + * @return True if there's another token waiting in the wings. False otherwise. + */ + public boolean hasNext() { + return nextToken != null; + } + + /** + * Get the next token, if one exists. If not, throw an IllegalStateException. + * @return The next ArgumentMatch in the series. Should never be null. + */ + public ArgumentMatch next() { + if( nextIndex == null || nextToken == null ) + throw new IllegalStateException( "No more ArgumentMatches are available" ); + + ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken ); + prepareNext(); + return match; + } + + /** + * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, + * initialize nextIndex / nextToken to null. + */ + private void prepareNext() { + if( tokenIterator != null && tokenIterator.hasNext() ) { + nextToken = tokenIterator.next(); + } + else { + nextIndex = null; + nextToken = null; + + // Do a nested loop. While more data is present in the inner loop, grab that data. + // Otherwise, troll the outer iterator looking for more data. + while( indexIterator.hasNext() ) { + nextIndex = indexIterator.next(); + if( indices.get(nextIndex) != null ) { + tokenIterator = indices.get(nextIndex).iterator(); + if( tokenIterator.hasNext() ) { + nextToken = tokenIterator.next(); + break; + } + } + } + } + + } + + /** + * Remove is unsupported in this context. + */ + public void remove() { + throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating."); + } + }; + } + + /** + * Merge two ArgumentMatches, so that the values for all arguments go into the + * same data structure. + * @param other The other match to merge into. + */ + public void mergeInto( ArgumentMatch other ) { + indices.putAll(other.indices); + } + + /** + * Associate a value with this merge maapping. + * @param index index of the command-line argument to which this value is mated. + * @param value Text representation of value to add. + */ + public void addValue( int index, String value ) { + if( !indices.containsKey(index) || indices.get(index) == null ) + indices.put(index, new ArrayList() ); + indices.get(index).add(value); + } + + /** + * Does this argument already have a value at the given site? + * Arguments are only allowed to be single-valued per site, and + * flags aren't allowed a value at all. + * @param index Index at which to check for values. + * @return True if the argument has a value at the given site. False otherwise. + */ + public boolean hasValueAtSite( int index ) { + return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag(); + } + + /** + * Return the values associated with this argument match. + * @return A collection of the string representation of these value. + */ + public List values() { + List values = new ArrayList(); + for( int index: indices.keySet() ) { + if( indices.get(index) != null ) + values.addAll(indices.get(index)); + } + return values; + } + + /** + * Convenience method returning true if the definition is a flag. + * @return True if definition is known to be a flag; false if not known to be a flag. + */ + private boolean isArgumentFlag() { + return definition != null && definition.isFlag; + } +} diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java index 3ee544c5f..03978adac 100755 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java @@ -192,200 +192,3 @@ public class ArgumentMatches implements Iterable { return new HashSet( argumentMatches.values() ); } } - -/** - * A mapping of all the sites where an argument definition maps to a site on the command line. - */ -class ArgumentMatch implements Iterable { - /** - * The argument definition that's been matched. - */ - public final ArgumentDefinition definition; - - /** - * The text that's been matched, as it appears in the command line arguments. - */ - public final String label; - - /** - * Maps indicies of command line arguments to values paired with that argument. - */ - public final SortedMap> indices = new TreeMap>(); - - /** - * Create a new argument match, defining its properties later. Used to create invalid arguments. - */ - public ArgumentMatch() { - this.label = null; - this.definition = null; - } - - /** - * A simple way of indicating that an argument with the given label and definition exists at this index. - * @param label Label of the argument match. Must not be null. - * @param definition The associated definition, if one exists. May be null. - * @param index Position of the argument. Must not be null. - */ - public ArgumentMatch( String label, ArgumentDefinition definition, int index ) { - this( label, definition, index, null ); - } - - private ArgumentMatch( String label, ArgumentDefinition definition, int index, String value ) { - this.label = label; - this.definition = definition; - - ArrayList values = new ArrayList(); - if( value != null ) - values.add(value); - indices.put(index,values ); - } - - /** - * Return a string representation of the given argument match, for debugging purposes. - * @return String representation of the match. - */ - public String toString() { - return label; - } - - /** - * Creates an iterator that walks over each individual match at each position of a given argument. - * @return An iterator over the individual matches in this argument. Will not be null. - */ - public Iterator iterator() { - return new Iterator() { - /** - * Iterate over each the available index. - */ - private Iterator indexIterator = null; - - /** - * Iterate over each available token. - */ - private Iterator tokenIterator = null; - - /** - * The next index to return. Null if none remain. - */ - Integer nextIndex = null; - - /** - * The next token to return. Null if none remain. - */ - String nextToken = null; - - { - indexIterator = indices.keySet().iterator(); - prepareNext(); - } - - /** - * Is there a nextToken available to return? - * @return True if there's another token waiting in the wings. False otherwise. - */ - public boolean hasNext() { - return nextToken != null; - } - - /** - * Get the next token, if one exists. If not, throw an IllegalStateException. - * @return The next ArgumentMatch in the series. Should never be null. - */ - public ArgumentMatch next() { - if( nextIndex == null || nextToken == null ) - throw new IllegalStateException( "No more ArgumentMatches are available" ); - - ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken ); - prepareNext(); - return match; - } - - /** - * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, - * initialize nextIndex / nextToken to null. - */ - private void prepareNext() { - if( tokenIterator != null && tokenIterator.hasNext() ) { - nextToken = tokenIterator.next(); - } - else { - nextIndex = null; - nextToken = null; - - // Do a nested loop. While more data is present in the inner loop, grab that data. - // Otherwise, troll the outer iterator looking for more data. - while( indexIterator.hasNext() ) { - nextIndex = indexIterator.next(); - if( indices.get(nextIndex) != null ) { - tokenIterator = indices.get(nextIndex).iterator(); - if( tokenIterator.hasNext() ) { - nextToken = tokenIterator.next(); - break; - } - } - } - } - - } - - /** - * Remove is unsupported in this context. - */ - public void remove() { - throw new UnsupportedOperationException("Cannot remove an argument match from the collection while iterating."); - } - }; - } - - /** - * Merge two ArgumentMatches, so that the values for all arguments go into the - * same data structure. - * @param other The other match to merge into. - */ - public void mergeInto( ArgumentMatch other ) { - indices.putAll(other.indices); - } - - /** - * Associate a value with this merge maapping. - * @param index index of the command-line argument to which this value is mated. - * @param value Text representation of value to add. - */ - public void addValue( int index, String value ) { - if( !indices.containsKey(index) || indices.get(index) == null ) - indices.put(index, new ArrayList() ); - indices.get(index).add(value); - } - - /** - * Does this argument already have a value at the given site? - * Arguments are only allowed to be single-valued per site, and - * flags aren't allowed a value at all. - * @param index Index at which to check for values. - * @return True if the argument has a value at the given site. False otherwise. - */ - public boolean hasValueAtSite( int index ) { - return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag(); - } - - /** - * Return the values associated with this argument match. - * @return A collection of the string representation of these value. - */ - public List values() { - List values = new ArrayList(); - for( int index: indices.keySet() ) { - if( indices.get(index) != null ) - values.addAll(indices.get(index)); - } - return values; - } - - /** - * Convenience method returning true if the definition is a flag. - * @return True if definition is known to be a flag; false if not known to be a flag. - */ - private boolean isArgumentFlag() { - return definition != null && definition.isFlag; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java index 182b1c8a3..635780aa5 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.gatk.walkers.Hidden; import java.lang.reflect.Field; -import java.util.Collection; +import java.util.Arrays; import java.util.List; /** @@ -41,9 +41,9 @@ import java.util.List; */ public class ArgumentSource { /** - * Class to which the field belongs. + * Field into which to inject command-line arguments. */ - public final Class clazz; + public final Field[] parentFields; /** * Field into which to inject command-line arguments. @@ -57,11 +57,19 @@ public class ArgumentSource { /** * Create a new command-line argument target. - * @param clazz Class containing the argument. - * @param field Field containing the argument. Field must be annotated with 'Argument'. + * @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'. */ - public ArgumentSource( Class clazz, Field field ) { - this.clazz = clazz; + public ArgumentSource( Field field ) { + this(new Field[0], field); + } + + /** + * Create a new command-line argument target. + * @param parentFields Parent fields containing the the field. Field must be annotated with 'ArgumentCollection'. + * @param field Field containing the argument. Field must be annotated with 'Input' or 'Output'. + */ + public ArgumentSource( Field[] parentFields, Field field ) { + this.parentFields = parentFields; this.field = field; this.typeDescriptor = ArgumentTypeDescriptor.create( field.getType() ); } @@ -80,7 +88,7 @@ public class ArgumentSource { return false; ArgumentSource otherArgumentSource = (ArgumentSource)other; - return this.clazz.equals(otherArgumentSource.clazz) && this.field.equals(otherArgumentSource.field); + return this.field == otherArgumentSource.field && Arrays.equals(this.parentFields, otherArgumentSource.parentFields); } /** @@ -89,7 +97,7 @@ public class ArgumentSource { */ @Override public int hashCode() { - return clazz.hashCode() ^ field.hashCode(); + return field.hashCode(); } /** @@ -118,18 +126,11 @@ public class ArgumentSource { /** * Parses the specified value based on the specified type. - * @param source The type of value to be parsed. * @param values String representation of all values passed. * @return the parsed value of the object. */ - public Object parse( ArgumentSource source, ArgumentMatches values ) { - Object value = null; - if( !isFlag() ) - value = typeDescriptor.parse( source, values ); - else - value = true; - - return value; + public Object parse( ArgumentMatches values ) { + return typeDescriptor.parse( this, values ); } /** @@ -145,8 +146,7 @@ public class ArgumentSource { * @return True if the argument supports multiple values. */ public boolean isMultiValued() { - Class argumentType = field.getType(); - return Collection.class.isAssignableFrom(argumentType) || field.getType().isArray(); + return typeDescriptor.isMultiValued( this ); } /** @@ -162,6 +162,6 @@ public class ArgumentSource { * @return String representation of the argument source. */ public String toString() { - return clazz.getSimpleName() + ": " + field.getName(); + return field.getDeclaringClass().getSimpleName() + ": " + field.getName(); } } diff --git a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 02a46b69d..4993ebfe5 100644 --- a/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -113,10 +113,26 @@ public abstract class ArgumentTypeDescriptor { return Collections.singletonList(createDefaultArgumentDefinition(source)); } + /** + * Parses an argument source to an object. + * @param source The source used to find the matches. + * @param matches The matches for the source. + * @return The parsed object. + */ public Object parse( ArgumentSource source, ArgumentMatches matches ) { return parse( source, source.field.getType(), matches ); } + /** + * Returns true if the field is a collection or an array. + * @param source The argument source to check. + * @return true if the field is a collection or an array. + */ + public boolean isMultiValued( ArgumentSource source ) { + Class argumentType = source.field.getType(); + return Collection.class.isAssignableFrom(argumentType) || argumentType.isArray(); + } + /** * By default, argument sources create argument definitions with a set of default values. * Use this method to create the one simple argument definition. @@ -125,15 +141,41 @@ public abstract class ArgumentTypeDescriptor { */ protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) { return new ArgumentDefinition( getArgumentAnnotation(source), + source.field.getType(), source.field.getName(), source.isFlag(), source.isMultiValued(), + getCollectionComponentType(source.field), source.isHidden(), getValidOptions(source) ); } - public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches ); + /** + * Return the component type of a field, or String.class if the type cannot be found. + * @param field The reflected field to inspect. + * @return The parameterized component type, or String.class if the parameterized type could not be found. + * @throws IllegalArgumentException If more than one parameterized type is found on the field. + */ + protected Class getCollectionComponentType( Field field ) { + // If this is a parameterized collection, find the contained type. If blow up if more than one type exists. + if( field.getGenericType() instanceof ParameterizedType) { + ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); + if( parameterizedType.getActualTypeArguments().length > 1 ) + throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); + return (Class)parameterizedType.getActualTypeArguments()[0]; + } + else + return String.class; + } + /** + * Parses the argument matches for a class type into an object. + * @param source The original argument source used to find the matches. + * @param type The current class type being inspected. May not match the argument source.field.getType() if this as a collection for example. + * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. + * @return The individual parsed object matching the argument match with Class type. + */ + public abstract Object parse( ArgumentSource source, Class type, ArgumentMatches matches ); /** * If the argument source only accepts a small set of options, populate the returned list with @@ -193,6 +235,11 @@ public abstract class ArgumentTypeDescriptor { throw new StingException("ArgumentAnnotation is not present for the argument field: " + source.field.getName()); } + /** + * Returns true if an argument annotation is present + * @param field The field to check for an annotation. + * @return True if an argument annotation is present on the field. + */ @SuppressWarnings("unchecked") public static boolean isArgumentAnnotationPresent(Field field) { for (Class annotation: ARGUMENT_ANNOTATIONS) @@ -235,6 +282,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) { + if (source.isFlag()) + return true; String value = getArgumentValue( createDefaultArgumentDefinition(source), matches ); // lets go through the types we support @@ -301,7 +350,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { public boolean supports( Class type ) { return ( Collection.class.isAssignableFrom(type) || type.isArray() ); } - + @Override @SuppressWarnings("unchecked") public Object parse( ArgumentSource source, Class type, ArgumentMatches matches ) @@ -319,16 +368,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { else if( java.util.Set.class.isAssignableFrom(type) ) type = java.util.TreeSet.class; } - // If this is a parameterized collection, find the contained type. If blow up if only one type exists. - if( source.field.getGenericType() instanceof ParameterizedType) { - ParameterizedType parameterizedType = (ParameterizedType)source.field.getGenericType(); - if( parameterizedType.getActualTypeArguments().length > 1 ) - throw new IllegalArgumentException("Unable to determine collection type of field: " + source.field.toString()); - componentType = (Class)parameterizedType.getActualTypeArguments()[0]; - } - else - componentType = String.class; - + componentType = getCollectionComponentType( source.field ); ArgumentTypeDescriptor componentArgumentParser = ArgumentTypeDescriptor.create( componentType ); Collection collection; diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index bedca9043..89fd143e2 100644 --- a/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -43,11 +43,11 @@ public abstract class CommandLineProgram { private static Logger logger = Logger.getRootLogger(); /** the default log level */ - @Input(fullName = "logging_level", + @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging, i.e. setting INFO get's you INFO up to FATAL, setting ERROR gets you ERROR and FATAL level logging.", required = false) - protected String logging_level = "WARN"; + protected String logging_level = "INFO"; /** where to send the output of our logger */ @@ -58,21 +58,21 @@ public abstract class CommandLineProgram { protected String toFile = null; /** do we want to silence the command line output */ - @Input(fullName = "quiet_output_mode", + @Argument(fullName = "quiet_output_mode", shortName = "quiet", doc = "Set the logging to quiet mode, no output to stdout", required = false) protected Boolean quietMode = false; /** do we want to generate debugging information with the logs */ - @Input(fullName = "debug_mode", + @Argument(fullName = "debug_mode", shortName = "debug", doc = "Set the logging file string to include a lot of debugging information (SLOW!)", required = false) protected Boolean debugMode = false; /** this is used to indicate if they've asked for help */ - @Input(fullName = "help", shortName = "h", doc = "Generate this help message", required = false) + @Argument(fullName = "help", shortName = "h", doc = "Generate this help message", required = false) public Boolean help = false; /** our logging output patterns */ @@ -146,6 +146,7 @@ public abstract class CommandLineProgram { * @param clp the command line program to execute * @param args the command line arguments passed in */ + @SuppressWarnings("unchecked") public static void start(CommandLineProgram clp, String[] args) { try { @@ -174,14 +175,14 @@ public abstract class CommandLineProgram { parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); parser.parse(args); - if (isHelpPresent(clp, parser)) + if (isHelpPresent(parser)) printHelpAndExit(clp, parser); parser.validate(); } else { parser.parse(args); - if (isHelpPresent(clp, parser)) + if (isHelpPresent(parser)) printHelpAndExit(clp, parser); parser.validate(); @@ -216,7 +217,7 @@ public abstract class CommandLineProgram { // if they specify a log location, output our data there if (clp.toFile != null) { - FileAppender appender = null; + FileAppender appender; try { appender = new FileAppender(layout, clp.toFile, false); logger.addAppender(appender); @@ -258,7 +259,7 @@ public abstract class CommandLineProgram { */ private static void toErrorLog(CommandLineProgram clp, Exception e) { File logFile = new File("GATK_Error.log"); - PrintStream stream = null; + PrintStream stream; try { stream = new PrintStream(logFile); } catch (Exception e1) { // catch all the exceptions here, if we can't create the file, do the alternate path @@ -279,22 +280,12 @@ public abstract class CommandLineProgram { parser.loadArgumentsIntoObject(obj); } - /** - * a manual way to load argument providing objects into the program - * - * @param clp the command line program - * @param cls the class to load the arguments off of - */ - public void loadAdditionalSource(CommandLineProgram clp, Class cls) { - parser.addArgumentSource(clp.getArgumentSourceName(cls), cls); - } - /** * this function checks the logger level passed in on the command line, taking the lowest * level that was provided. */ private void setupLoggerLevel() { - Level par = Level.WARN; + Level par; if (logging_level.toUpperCase().equals("DEBUG")) { par = Level.DEBUG; } else if (logging_level.toUpperCase().equals("ERROR")) { @@ -316,9 +307,9 @@ public abstract class CommandLineProgram { } /** - * a function used to indicate an error occured in the command line tool + * a function used to indicate an error occurred in the command line tool * - * @param msg + * @param msg message to display */ private static void printExitSystemMsg(final String msg) { System.out.printf("The following error has occurred:%n%n"); @@ -334,12 +325,11 @@ public abstract class CommandLineProgram { /** * Do a cursory search for the given argument. * - * @param clp Instance of the command-line program. * @param parser Parser * * @return True if help is present; false otherwise. */ - private static boolean isHelpPresent(CommandLineProgram clp, ParsingEngine parser) { + private static boolean isHelpPresent(ParsingEngine parser) { return parser.isArgumentPresent("help"); } diff --git a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 2055faea9..d48123a4d 100755 --- a/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -270,26 +270,38 @@ public class ParsingEngine { return; // Target instance into which to inject the value. - List targets = new ArrayList(); - - // Check to see whether the instance itself can be the target. - if( source.clazz.isAssignableFrom(instance.getClass()) ) { - targets.add(instance); - } - - // Check to see whether a contained class can be the target. - targets.addAll(getContainersMatching(instance,source.clazz)); + Collection targets = findTargets( source, instance ); // Abort if no home is found for the object. if( targets.size() == 0 ) throw new StingException("Internal command-line parser error: unable to find a home for argument matches " + argumentMatches); for( Object target: targets ) { - Object value = (argumentMatches.size() != 0) ? source.parse(source,argumentMatches) : source.getDefault(); + Object value = (argumentMatches.size() != 0) ? source.parse(argumentMatches) : source.getDefault(); JVMUtils.setFieldValue(source.field,target,value); } } + /** + * Gets a collection of the container instances of the given type stored within the given target. + * @param source Argument source. + * @param instance Container. + * @return A collection of containers matching the given argument source. + */ + private Collection findTargets(ArgumentSource source, Object instance) { + LinkedHashSet targets = new LinkedHashSet(); + for( Class clazz = instance.getClass(); clazz != null; clazz = clazz.getSuperclass() ) { + for( Field field: clazz.getDeclaredFields() ) { + if( field.equals(source.field) ) { + targets.add(instance); + } else if( field.isAnnotationPresent(ArgumentCollection.class) ) { + targets.addAll(findTargets(source, JVMUtils.getFieldValue(field, instance))); + } + } + } + return targets; + } + /** * Prints out the help associated with these command-line argument definitions. * @param applicationDetails Details about the specific GATK-based application being run. @@ -303,15 +315,22 @@ public class ParsingEngine { * @param sourceClass class to act as sources for other arguments. * @return A list of sources associated with this object and its aggregated objects. */ - protected static List extractArgumentSources(Class sourceClass) { + public static List extractArgumentSources(Class sourceClass) { + return extractArgumentSources(sourceClass, new Field[0]); + } + + private static List extractArgumentSources(Class sourceClass, Field[] parentFields) { List argumentSources = new ArrayList(); while( sourceClass != null ) { Field[] fields = sourceClass.getDeclaredFields(); for( Field field: fields ) { if( ArgumentTypeDescriptor.isArgumentAnnotationPresent(field) ) - argumentSources.add( new ArgumentSource(sourceClass,field) ); - if( field.isAnnotationPresent(ArgumentCollection.class) ) - argumentSources.addAll( extractArgumentSources(field.getType()) ); + argumentSources.add( new ArgumentSource(parentFields, field) ); + if( field.isAnnotationPresent(ArgumentCollection.class) ) { + Field[] newParentFields = Arrays.copyOf(parentFields, parentFields.length + 1); + newParentFields[parentFields.length] = field; + argumentSources.addAll( extractArgumentSources(field.getType(), newParentFields) ); + } } sourceClass = sourceClass.getSuperclass(); } @@ -350,24 +369,6 @@ public class ParsingEngine { // No parse results found. return null; } - - /** - * Gets a list of the container instances of the given type stored within the given target. - * @param target Class holding the container. - * @param type Container type. - * @return A list of containers matching the given type. - */ - private List getContainersMatching(Object target, Class type) { - List containers = new ArrayList(); - - Field[] fields = target.getClass().getDeclaredFields(); - for( Field field: fields ) { - if( field.isAnnotationPresent(ArgumentCollection.class) && type.isAssignableFrom(field.getType()) ) - containers.add(JVMUtils.getFieldValue(field,target)); - } - - return containers; - } } /** diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index ce8b23d44..b13c26cb1 100755 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.gatk.GATKErrorReport; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.commandline.*; @@ -135,7 +134,7 @@ public class CommandLineGATK extends CommandLineExecutable { * @return A string summarizing the walkers available in this distribution. */ private String getAdditionalHelp() { - String additionalHelp = ""; + String additionalHelp; // If no analysis name is present, fill in extra help on the walkers. WalkerManager walkerManager = GATKEngine.getWalkerManager(); @@ -152,7 +151,7 @@ public class CommandLineGATK extends CommandLineExecutable { private static final int WALKER_INDENT = 3; private static final String FIELD_SEPARATOR = " "; - private String getWalkerHelp(Class walkerType) { + private String getWalkerHelp(Class walkerType) { // Construct a help string to output details on this walker. StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 588d56a19..d8d4a7861 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -40,19 +40,10 @@ import org.broadinstitute.sting.utils.help.SummaryTaglet; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: hanna - * Date: Mar 17, 2009 - * Time: 3:14:28 PM - * To change this template use File | Settings | File Templates. + * Plugin manager that also provides various utilities for inspecting Walkers. */ public class WalkerManager extends PluginManager { - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(WalkerManager.class); - /** * A collection of help text for walkers and their enclosing packages. */ @@ -92,7 +83,7 @@ public class WalkerManager extends PluginManager { public String getPackageDisplayName(String packageName) { // Try to find an override for the display name of this package. String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME); - String displayName = null; + String displayName; if(helpText.containsKey(displayNameKey)) { displayName = helpText.getString(displayNameKey); } @@ -130,6 +121,15 @@ public class WalkerManager extends PluginManager { return helpText.getString(walkerSummary); } + /** + * Gets the summary help text associated with a given walker type. + * @param walker Walker for which to search for help text. + * @return Walker summary description, or "" if none exists. + */ + public String getWalkerSummaryText(Walker walker) { + return getWalkerSummaryText(walker.getClass()); + } + /** * Gets the descriptive help text associated with a given walker type. * @param walkerType Type of walker for which to search for help text. @@ -142,13 +142,34 @@ public class WalkerManager extends PluginManager { return helpText.getString(walkerDescription); } + /** + * Gets the descriptive help text associated with a given walker type. + * @param walker Walker for which to search for help text. + * @return Walker full description, or "" if none exists. + */ + public String getWalkerDescriptionText(Walker walker) { + return getWalkerDescriptionText(walker.getClass()); + } + /** * Retrieves the walker class given a walker name. * @param walkerName Name of the walker. * @return Class representing the walker. */ - public Class getWalkerClassByName(String walkerName) { - return (Class)pluginsByName.get(walkerName); + public Class getWalkerClassByName(String walkerName) { + return pluginsByName.get(walkerName); + } + + /** + * Gets the data source for the provided walker. + * @param walkerClass The class of the walker. + * @return Which type of data source to traverse over...reads or reference? + */ + public static DataSource getWalkerDataSource(Class walkerClass) { + By byDataSource = walkerClass.getAnnotation(By.class); + if( byDataSource == null ) + throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName()); + return byDataSource.value(); } /** @@ -157,21 +178,38 @@ public class WalkerManager extends PluginManager { * @return Which type of data source to traverse over...reads or reference? */ public static DataSource getWalkerDataSource(Walker walker) { - Class walkerClass = walker.getClass(); - By byDataSource = walkerClass.getAnnotation(By.class); - if( byDataSource == null ) - throw new StingException("Unable to find By annotation for walker class " + walkerClass.getName()); - return byDataSource.value(); + return getWalkerDataSource(walker.getClass()); + } + + /** + * Get a list of RODs allowed by the walker. + * @param walkerClass Class of the walker to query. + * @return The list of allowed reference meta data. + */ + public static List getAllowsMetaData(Class walkerClass) { + Allows allowsDataSource = getWalkerAllowed(walkerClass); + if (allowsDataSource == null) + return Collections.emptyList(); + return Arrays.asList(allowsDataSource.referenceMetaData()); + } + + /** + * Get a list of RODs allowed by the walker. + * @param walker Walker to query. + * @return The list of allowed reference meta data. + */ + public static List getAllowsMetaData(Walker walker) { + return getAllowsMetaData(walker.getClass()); } /** * Determine whether the given walker supports the given data source. - * @param walker Walker to query. + * @param walkerClass Class of the walker to query. * @param dataSource Source to check for . * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Walker walker, DataSource dataSource) { - Allows allowsDataSource = getWalkerAllowed(walker); + public static boolean isAllowed(Class walkerClass, DataSource dataSource) { + Allows allowsDataSource = getWalkerAllowed(walkerClass); // Allows is less restrictive than requires. If an allows // clause is not specified, any kind of data is allowed. @@ -182,13 +220,23 @@ public class WalkerManager extends PluginManager { } /** - * Determine whether the given walker supports the given reference ordered data. + * Determine whether the given walker supports the given data source. * @param walker Walker to query. + * @param dataSource Source to check for . + * @return True if the walker forbids this data type. False otherwise. + */ + public static boolean isAllowed(Walker walker, DataSource dataSource) { + return isAllowed(walker.getClass(), dataSource); + } + + /** + * Determine whether the given walker supports the given reference ordered data. + * @param walkerClass Class of the walker to query. * @param rod Source to check. * @return True if the walker forbids this data type. False otherwise. */ - public static boolean isAllowed(Walker walker, RMDTrack rod) { - Allows allowsDataSource = getWalkerAllowed(walker); + public static boolean isAllowed(Class walkerClass, RMDTrack rod) { + Allows allowsDataSource = getWalkerAllowed(walkerClass); // Allows is less restrictive than requires. If an allows // clause is not specified, any kind of data is allowed. @@ -208,6 +256,27 @@ public class WalkerManager extends PluginManager { return false; } + /** + * Determine whether the given walker supports the given reference ordered data. + * @param walker Walker to query. + * @param rod Source to check. + * @return True if the walker forbids this data type. False otherwise. + */ + public static boolean isAllowed(Walker walker, RMDTrack rod) { + return isAllowed(walker.getClass(), rod); + } + + /** + * Determine whether the given walker requires the given data source. + * @param walkerClass Class of the walker to query. + * @param dataSource Source to check for. + * @return True if the walker allows this data type. False otherwise. + */ + public static boolean isRequired(Class walkerClass, DataSource dataSource) { + Requires requiresDataSource = getWalkerRequirements(walkerClass); + return Arrays.asList(requiresDataSource.value()).contains(dataSource); + } + /** * Determine whether the given walker requires the given data source. * @param walker Walker to query. @@ -215,18 +284,26 @@ public class WalkerManager extends PluginManager { * @return True if the walker allows this data type. False otherwise. */ public static boolean isRequired(Walker walker, DataSource dataSource) { - Requires requiresDataSource = getWalkerRequirements(walker); - return Arrays.asList(requiresDataSource.value()).contains(dataSource); + return isRequired(walker.getClass(), dataSource); + } + + /** + * Get a list of RODs required by the walker. + * @param walkerClass Class of the walker to query. + * @return The list of required reference meta data. + */ + public static List getRequiredMetaData(Class walkerClass) { + Requires requiresDataSource = getWalkerRequirements(walkerClass); + return Arrays.asList(requiresDataSource.referenceMetaData()); } /** * Get a list of RODs required by the walker. * @param walker Walker to query. - * @return True if the walker allows this data type. False otherwise. + * @return The list of required reference meta data. */ public static List getRequiredMetaData(Walker walker) { - Requires requiresDataSource = getWalkerRequirements(walker); - return Arrays.asList(requiresDataSource.referenceMetaData()); + return getRequiredMetaData(walker.getClass()); } /** @@ -238,6 +315,19 @@ public class WalkerManager extends PluginManager { return walkerType.isAnnotationPresent(Hidden.class); } + /** + * Extracts filters that the walker has requested be run on the dataset. + * @param walkerClass Class of the walker to inspect for filtering requests. + * @param filterManager Manages the creation of filters. + * @return A non-empty list of filters to apply to the reads. + */ + public static List getReadFilters(Class walkerClass, FilterManager filterManager) { + List filters = new ArrayList(); + for(Class filterType: getReadFilterTypes(walkerClass)) + filters.add(filterManager.createFilterByType(filterType)); + return filters; + } + /** * Extracts filters that the walker has requested be run on the dataset. * @param walker Walker to inspect for filtering requests. @@ -245,10 +335,28 @@ public class WalkerManager extends PluginManager { * @return A non-empty list of filters to apply to the reads. */ public static List getReadFilters(Walker walker, FilterManager filterManager) { - List filters = new ArrayList(); - for(Class filterType: getReadFilterTypes(walker)) - filters.add(filterManager.createFilterByType(filterType)); - return filters; + return getReadFilters(walker.getClass(), filterManager); + } + + /** + * Gets the type of downsampling method requested by the walker. If an alternative + * downsampling method is specified on the command-line, the command-line version will + * be used instead. + * @param walkerClass The class of the walker to interrogate. + * @return The downsampling method, as specified by the walker. Null if none exists. + */ + public static DownsamplingMethod getDownsamplingMethod(Class walkerClass) { + DownsamplingMethod downsamplingMethod = null; + + if( walkerClass.isAnnotationPresent(Downsample.class) ) { + Downsample downsampleParameters = walkerClass.getAnnotation(Downsample.class); + DownsampleType type = downsampleParameters.by(); + Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null; + Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null; + downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction); + } + + return downsamplingMethod; } /** @@ -259,17 +367,7 @@ public class WalkerManager extends PluginManager { * @return The downsampling method, as specified by the walker. Null if none exists. */ public static DownsamplingMethod getDownsamplingMethod(Walker walker) { - DownsamplingMethod downsamplingMethod = null; - - if( walker.getClass().isAnnotationPresent(Downsample.class) ) { - Downsample downsampleParameters = walker.getClass().getAnnotation(Downsample.class); - DownsampleType type = downsampleParameters.by(); - Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null; - Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null; - downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction); - } - - return downsamplingMethod; + return getDownsamplingMethod(walker.getClass()); } /** @@ -293,26 +391,55 @@ public class WalkerManager extends PluginManager { /** * Utility to get the requires attribute from the walker. * Throws an exception if requirements are missing. - * @param walker Walker to query for required data. + * @param walkerClass Class of the walker to query for required data. * @return Required data attribute. */ - private static Requires getWalkerRequirements(Walker walker) { - Class walkerClass = walker.getClass(); + private static Requires getWalkerRequirements(Class walkerClass) { Requires requiresDataSource = walkerClass.getAnnotation(Requires.class); if( requiresDataSource == null ) throw new StingException( "Unable to find data types required by walker class " + walkerClass.getName()); return requiresDataSource; } + /** + * Utility to get the requires attribute from the walker. + * Throws an exception if requirements are missing. + * @param walker Walker to query for required data. + * @return Required data attribute. + */ + private static Requires getWalkerRequirements(Walker walker) { + return getWalkerRequirements(walker.getClass()); + } + + /** + * Utility to get the forbidden attribute from the walker. + * @param walkerClass Class of the walker to query for required data. + * @return Required data attribute. Null if forbidden info isn't present. + */ + private static Allows getWalkerAllowed(Class walkerClass) { + Allows allowsDataSource = walkerClass.getAnnotation(Allows.class); + return allowsDataSource; + } + /** * Utility to get the forbidden attribute from the walker. * @param walker Walker to query for required data. * @return Required data attribute. Null if forbidden info isn't present. */ private static Allows getWalkerAllowed(Walker walker) { - Class walkerClass = walker.getClass(); - Allows allowsDataSource = walkerClass.getAnnotation(Allows.class); - return allowsDataSource; + return getWalkerAllowed(walker.getClass()); + } + + /** + * Gets the list of filtering classes specified as walker annotations. + * @param walkerClass Class of the walker to inspect. + * @return An array of types extending from SamRecordFilter. Will never be null. + */ + @SuppressWarnings("unchecked") + public static Class[] getReadFilterTypes(Class walkerClass) { + if( !walkerClass.isAnnotationPresent(ReadFilters.class) ) + return new Class[0]; + return walkerClass.getAnnotation(ReadFilters.class).value(); } /** @@ -320,10 +447,7 @@ public class WalkerManager extends PluginManager { * @param walker The walker to inspect. * @return An array of types extending from SamRecordFilter. Will never be null. */ - private static Class[] getReadFilterTypes(Walker walker) { - Class walkerClass = walker.getClass(); - if( !walkerClass.isAnnotationPresent(ReadFilters.class) ) - return new Class[0]; - return walkerClass.getAnnotation(ReadFilters.class).value(); + public static Class[] getReadFilterTypes(Walker walker) { + return getReadFilterTypes(walker.getClass()); } } diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 90afae069..3be32ec49 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -29,6 +29,8 @@ import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.simpleframework.xml.*; @@ -64,7 +66,7 @@ public class GATKArgumentCollection { // parameters and their defaults @ElementList(required = false) - @Argument(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) + @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) public List samFiles = new ArrayList(); @Element(required = false) @@ -76,19 +78,19 @@ public class GATKArgumentCollection { public List readFilters = new ArrayList(); @ElementList(required = false) - @Argument(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) + @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) public List intervals = null; @ElementList(required = false) - @Argument(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false) + @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false) public List excludeIntervals = null; @Element(required = false) - @Argument(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) + @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) public File referenceFile = null; @ElementList(required = false) - @Argument(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form ,,", required = false) + @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form ,,", required = false) public ArrayList RODBindings = new ArrayList(); @Element(required = false) @@ -100,30 +102,30 @@ public class GATKArgumentCollection { public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION; @Element(required = false) - @Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) + @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) public String DBSNPFile = null; @Element(required = false) - @Argument(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false) + @Input(fullName = "hapmap", shortName = "H", doc = "Hapmap file", required = false) public String HAPMAPFile = null; @Element(required = false) - @Argument(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false) + @Input(fullName = "hapmap_chip", shortName = "hc", doc = "Hapmap chip file", required = false) public String HAPMAPChipFile = null; /** An output file presented to the walker. */ @Element(required = false) - @Argument(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false) + @Output(fullName = "out", shortName = "o", doc = "An output file presented to the walker. Will overwrite contents if file exists.", required = false) public String outFileName = null; /** An error output file presented to the walker. */ @Element(required = false) - @Argument(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false) + @Output(fullName = "err", shortName = "e", doc = "An error output file presented to the walker. Will overwrite contents if file exists.", required = false) public String errFileName = null; /** A joint file for both 'normal' and error output presented to the walker. */ @Element(required = false) - @Argument(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false) + @Output(fullName = "outerr", shortName = "oe", doc = "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", required = false) public String outErrFileName = null; @Element(required = false) diff --git a/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java b/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java index 44f9bdf76..bd899b80c 100644 --- a/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java @@ -30,6 +30,8 @@ import org.broadinstitute.sting.utils.classloader.PluginManager; import net.sf.picard.filter.SamRecordFilter; +import java.util.Collection; + /** * Manage filters and filter options. Any requests for basic filtering classes * should ultimately be made through this class. @@ -38,11 +40,6 @@ import net.sf.picard.filter.SamRecordFilter; * @version 0.1 */ public class FilterManager extends PluginManager { - /** - * our log, which we want to capture anything from this class - */ - private static Logger logger = Logger.getLogger(FilterManager.class); - public FilterManager() { super(SamRecordFilter.class,"filter","Filter"); } @@ -50,10 +47,14 @@ public class FilterManager extends PluginManager { /** * Instantiate a filter of the given type. Along the way, scream bloody murder if * the filter is not available. - * @param filterType - * @return + * @param filterType The type of the filter + * @return The filter */ public SamRecordFilter createFilterByType(Class filterType) { return this.createByName(getName(filterType)); } + + public Collection> getValues() { + return this.pluginsByName.values(); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java index ccbc16d37..2ac9c0314 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java @@ -158,10 +158,12 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor Annotation annotation = this.getArgumentAnnotation(source); return new ArgumentDefinition( annotation, + source.field.getType(), "variants_out", "varout", false, source.isMultiValued(), + getCollectionComponentType(source.field), source.isHidden(), null ); } @@ -173,13 +175,15 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor */ private ArgumentDefinition createGenotypeFormatArgumentDefinition(ArgumentSource source) { Annotation annotation = this.getArgumentAnnotation(source); - return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation), + return new ArgumentDefinition( ArgumentIOType.getIOType(annotation), + GenotypeWriterFactory.GENOTYPE_FORMAT.class, "variant_output_format", "vf", "Format to be used to represent variants; default is VCF", false, false, false, + null, source.isHidden(), null, null, diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 184da8757..7f9802220 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -97,10 +97,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor private ArgumentDefinition createBAMArgumentDefinition(ArgumentSource source) { Annotation annotation = this.getArgumentAnnotation(source); return new ArgumentDefinition( annotation, + source.field.getType(), DEFAULT_ARGUMENT_FULLNAME, DEFAULT_ARGUMENT_SHORTNAME, false, source.isMultiValued(), + getCollectionComponentType(source.field), source.isHidden(), null ); } @@ -112,13 +114,15 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor */ private ArgumentDefinition createBAMCompressionArgumentDefinition(ArgumentSource source) { Annotation annotation = this.getArgumentAnnotation(source); - return new ArgumentDefinition( ArgumentDefinition.getIOType(annotation), + return new ArgumentDefinition( ArgumentIOType.getIOType(annotation), + int.class, COMPRESSION_FULLNAME, COMPRESSION_SHORTNAME, "Compression level to use for writing BAM files", false, false, false, + null, source.isHidden(), null, null, diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java deleted file mode 100644 index 7f7b8da13..000000000 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.iterators; - -import net.sf.picard.PicardException; -import net.sf.picard.sam.ReservedTagConstants; -import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.picard.util.PeekableIterator; -import net.sf.samtools.*; -import net.sf.samtools.util.CloseableIterator; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; - -import java.lang.reflect.Constructor; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.PriorityQueue; - -// Should replace picard class with the same name -class ComparableSamRecordIterator extends PeekableIterator implements Comparable, StingSAMIterator { - private Reads sourceInfo; - private final Comparator comparator; - private final SAMFileReader reader; - private final SamFileHeaderMerger mHeaderMerger; - - /** - * Constructs an iterator for iteration over the supplied SAM file that will be - * able to compare itself to other ComparableSAMRecordIterator instances using - * the supplied comparator for ordering SAMRecords. - * - * @param sam the SAM file to read records from - * @param comparator the Comparator to use to provide ordering fo SAMRecords - */ - public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, final Comparator comparator) { - super(sam.iterator()); - this.reader = sam; - this.comparator = comparator; - mHeaderMerger = samHeaderMerger; - } - - public ComparableSamRecordIterator(SamFileHeaderMerger samHeaderMerger, final SAMFileReader sam, Iterator iterator, final Comparator comparator) { - super(iterator); // use the provided iterator - this.reader = sam; - this.comparator = comparator; - mHeaderMerger = samHeaderMerger; - } - - public Reads getSourceInfo() { - if (sourceInfo == null) - throw new StingException("Unable to provide source info for the reads. Please upgrade to the new data sharding framework."); - return sourceInfo; - } - - /** - * Returns the reader from which this iterator was constructed. - * - * @return the SAMFileReader - */ - public SAMFileReader getReader() { - return reader; - } - - /** - * Compares this iterator to another comparable iterator based on the next record - * available in each iterator. If the two comparable iterators have different - * comparator types internally an exception is thrown. - * - * @param that another iterator to compare to - * - * @return a negative, 0 or positive number as described in the Comparator interface - */ - public int compareTo(final ComparableSamRecordIterator that) { - if (this.comparator.getClass() != that.comparator.getClass()) { - throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + - "have different orderings internally"); - } - - final SAMRecord record = this.peek(); - final SAMRecord record2 = that.peek(); - record.setHeader(mHeaderMerger.getMergedHeader()); - record2.setHeader(mHeaderMerger.getMergedHeader()); - int index, index2; - try { - index = mHeaderMerger.getMergedHeader().getSequenceIndex(record.getReferenceName()); - record.setReferenceIndex(index); - - index2 = mHeaderMerger.getMergedHeader().getSequenceIndex(record2.getReferenceName()); - record2.setReferenceIndex(index2); - } catch (Exception e) { - throw new StingException("MergingSamRecordIterator2: unable to correct the reference index for read " + record.getReadName() + " or record " + record2.getReadName(),e); - } - return comparator.compare(record, record2); - } - - public Iterator iterator() { - return this; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java index fb0fd3b25..19a6607fb 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManager.java @@ -31,26 +31,28 @@ import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.StingException; import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - +import java.util.*; /** - * @author aaron - *

- * Class RMDTrackManager - *

- * Find the available track builders, and create the requisite tracks from the command line. + * Find the available track builders, and create the requisite tracks from the command line. + * + * In Tribble RMD tracks have two classes: + * - a Feature that is the model/view for the data + * - a Codec that is the controller to generate the Feature. + * + * In this class, the track types are the Codecs. The track record types are the Features. */ public class RMDTrackManager extends PluginManager { // the input strings we use to create RODs from List inputs = new ArrayList(); // create an active mapping of builder instances, and a map of the name -> class for convenience - Map availableTracks; - Map availableTrackClasses; + /** the tracks that are available to us, associated with their builder */ + Map availableTrackBuilders; + /** the classes names, with their class description (think the Controller Codecs) */ + Map availableTrackTypes; + /** the available track record types (think the Model/View Features) */ + Map availableTrackRecordTypes; /** Create a new track plugin manager. */ public RMDTrackManager() { @@ -65,28 +67,56 @@ public class RMDTrackManager extends PluginManager { * @return a list of RMDTracks, one for each -B option */ public List getReferenceMetaDataSources(List triplets) { - if (availableTracks == null || availableTrackClasses == null) initialize(triplets); + initializeTrackTypes(); + initializeTriplets(triplets); // try and make the tracks given their requests - return createRequestedTrackObjects(availableTracks, availableTrackClasses); + return createRequestedTrackObjects(); + } + + + /** + * Returns a collection of track names that match the record type. + * @param trackRecordType the record type specified in the @RMD annotation + * @return a collection of available track record type names that match the record type + */ + public Collection getTrackRecordTypeNames(Class trackRecordType) { + initializeTrackTypes(); + initializeTrackRecordTypes(); + Set names = new TreeSet(); + for (Map.Entry availableTrackRecordType: availableTrackRecordTypes.entrySet()) { + if (trackRecordType.isAssignableFrom(availableTrackRecordType.getValue())) + names.add(availableTrackRecordType.getKey()); + } + return names; } /** - * initialize our lists of tracks and builders + * initialize our lists of triplets * @param triplets the input to the GATK, as a list of strings passed in through the -B options */ - private void initialize(List triplets) { + private void initializeTriplets(List triplets) { + // NOTE: Method acts as a static. Once the inputs have been passed once they are locked in. + if (inputs.size() > 0 || triplets.size() == 0) + return; + for (String value: triplets) { String[] split = value.split(","); if (split.length != 3) throw new IllegalArgumentException(value + " is not a valid reference metadata track description"); inputs.add(new RMDTriplet(split[0], split[1], split[2])); } + } + + /** + * initialize our lists of tracks and builders + */ + private void initializeTrackTypes() { + if (availableTrackBuilders != null && availableTrackTypes != null) + return; // create an active mapping of builder instances, and a map of the name -> class for convenience - availableTracks = new HashMap(); - availableTrackClasses = new HashMap(); + availableTrackBuilders = new HashMap(); + availableTrackTypes = new HashMap(); createBuilderObjects(); - - } /** @@ -98,8 +128,24 @@ public class RMDTrackManager extends PluginManager { RMDTrackBuilder builder = this.createByName(builderName); Map mapping = builder.getAvailableTrackNamesAndTypes(); for (String name : mapping.keySet()) { - availableTracks.put(name.toUpperCase(), builder); - availableTrackClasses.put(name.toUpperCase(), mapping.get(name)); + availableTrackBuilders.put(name.toUpperCase(), builder); + availableTrackTypes.put(name.toUpperCase(), mapping.get(name)); + } + } + } + + /** + * initialize our list of track record types + */ + private void initializeTrackRecordTypes() { + if (availableTrackRecordTypes != null) + return; + + availableTrackRecordTypes = new HashMap(); + for (RMDTrackBuilder builder : availableTrackBuilders.values()) { + Map mapping = builder.getAvailableTrackNamesAndRecordTypes(); + for (String name : mapping.keySet()) { + availableTrackRecordTypes.put(name.toUpperCase(), mapping.get(name)); } } } @@ -107,22 +153,18 @@ public class RMDTrackManager extends PluginManager { /** * create the requested track objects * - * @param availableTracks the tracks that are available to us, associated with their builder - * @param availableTrackClasses the classes names, with their class description - * * @return a list of the tracks, one for each of the requested input tracks */ - private List createRequestedTrackObjects(Map availableTracks, Map availableTrackClasses) { + private List createRequestedTrackObjects() { // create of live instances of the tracks List tracks = new ArrayList(); // create instances of each of the requested types for (RMDTriplet trip : inputs) { - RMDTrackBuilder b = availableTracks.get(trip.getType().toUpperCase()); + RMDTrackBuilder b = availableTrackBuilders.get(trip.getType().toUpperCase()); if (b == null) throw new StingException("Unable to find track for " + trip.getType()); - tracks.add(b.createInstanceOfTrack(availableTrackClasses.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile()))); + tracks.add(b.createInstanceOfTrack(availableTrackTypes.get(trip.getType().toUpperCase()), trip.getName(), new File(trip.getFile()))); } return tracks; } } - diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 01c971acb..17b778f45 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -44,6 +44,9 @@ public interface RMDTrackBuilder { /** @return a list of all available tracks types we currently have access to create */ public Map getAvailableTrackNamesAndTypes(); + /** @return a list of all available track record types we currently have access to create */ + public Map getAvailableTrackNamesAndRecordTypes(); + /** * create a RMDTrack of the specified type * diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index dc5de4e20..b04b2dad1 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -75,15 +75,19 @@ public class RODTrackBuilder implements RMDTrackBuilder { return new RODRMDTrack(targetClass, name, inputFile, createROD(name,targetClass,inputFile)); } - /** @return a map of all available tracks we currently have access to create */ + /** @return a map of all available track types we currently have access to create */ + @Override public Map getAvailableTrackNamesAndTypes() { - Map ret = new HashMap(); - for (String name : Types.keySet()) - ret.put(name, Types.get(name)); - return ret; + return new HashMap(Types); } -/** + /** @return a map of all available track record types we currently have access to create */ + @Override + public Map getAvailableTrackNamesAndRecordTypes() { + return new HashMap(Types); + } + + /** * Helpful function that parses a single triplet of and returns the corresponding ROD with * , of type that reads its input from . * diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index b5e18e069..955d5111e 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -35,7 +35,6 @@ import org.broad.tribble.index.IndexFactory; import org.broad.tribble.index.interval.IntervalIndexCreator; import org.broad.tribble.index.linear.LinearIndexCreator; import org.broad.tribble.source.BasicFeatureSource; -import org.broad.tribble.util.LittleEndianInputStream; import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -80,12 +79,20 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen super(FeatureCodec.class, "Codecs", "Codec"); } - /** @return a list of all available tracks we currently have access to create */ + /** @return a list of all available track types we currently have access to create */ @Override public Map getAvailableTrackNamesAndTypes() { + return new HashMap(this.pluginsByName); + } + + /** @return a list of all available track record types we currently have access to create */ + @Override + public Map getAvailableTrackNamesAndRecordTypes() { Map classes = new HashMap(); - for (String c : this.pluginsByName.keySet()) - classes.put(c, this.pluginsByName.get(c)); + for (String name: this.pluginsByName.keySet()) { + FeatureCodec codec = this.createByName(name); + classes.put(name, codec.getFeatureType()); + } return classes; } @@ -115,11 +122,12 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen /** * create a feature reader of the specified type * @param targetClass the target codec type + * @param name the target name * @param inputFile the input file to create the track from (of the codec type) * @return the FeatureReader instance */ public Pair createFeatureReader(Class targetClass, String name, File inputFile) { - Pair pair = null; + Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) pair = createBasicFeatureSourceNoAssumedIndex(targetClass, name, inputFile); else @@ -133,6 +141,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen * exists. * * @param targetClass the codec class type + * @param name the name of the track * @param inputFile the file to load * @return a feature reader implementation */ @@ -156,6 +165,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen /** * create a linear feature reader, where we create the index ahead of time * @param targetClass the target class + * @param name the name of the codec * @param inputFile the tribble file to parse * @return the input file as a FeatureReader */ @@ -264,7 +274,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen * @param indexFile the index file location * @param lock the locking object * @return the index object - * @throws IOException + * @throws IOException when unable to create the new index */ private static Index createNewIndex(File inputFile, FeatureCodec codec, boolean onDisk, File indexFile, FSLockWithShared lock) throws IOException { Index index = createIndexInMemory(inputFile, codec); @@ -296,7 +306,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen * @param inputFile the input file * @param codec the codec * @return a LinearIndex, given the file location - * @throws IOException + * @throws IOException when unable to create the index in memory */ private static Index createIndexInMemory(File inputFile, FeatureCodec codec) throws IOException { // this can take a while, let them know what we're doing @@ -317,7 +327,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen * @param contigList the contig list, in coordinate order, this is allowed to be null * @return a SAMSequenceDictionary, WITHOUT contig sizes */ - private static final SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet contigList) { + private static SAMSequenceDictionary sequenceSetToDictionary(LinkedHashSet contigList) { SAMSequenceDictionary dict = new SAMSequenceDictionary(); if (contigList == null) return dict; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 83e363b20..acbc708bb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -487,7 +487,7 @@ public class ClipReadsWalker extends ReadWalker { /** an optional argument to dump the reads out to a BAM file */ - @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) + @Output(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) SAMFileWriter outputBamFile = null; @Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false) String readGroup = null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java b/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java index 20479a05c..306b3b1d3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/RMD.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.Feature; + import java.lang.annotation.Documented; import java.lang.annotation.Inherited; import java.lang.annotation.Retention; @@ -25,5 +27,5 @@ import java.lang.annotation.RetentionPolicy; @Retention(RetentionPolicy.RUNTIME) public @interface RMD { String name(); - Class type(); + Class type() default Feature.class; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 1480a9ee9..2d72fb439 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; @@ -85,7 +86,7 @@ public class CovariateCounterWalker extends LocusWalker getAnnotationIOClass() { return argumentDefinition.ioType.annotationClass; } + @Override protected String getDoc() { return escape(argumentDefinition.doc); } + @Override protected String getFullName() { return escape(argumentDefinition.fullName); } + @Override protected String getShortName() { return escape(argumentDefinition.shortName); } + @Override protected boolean isRequired() { return argumentDefinition.required; } + @Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); } + @Override protected String getValidation() { return escape(argumentDefinition.validation); } + + protected static final String REQUIRED_TEMPLATE = " + \" %1$s \" + %2$s.format(%3$s)"; + protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=%2$s)"; + protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=%2$s)"; + protected static final String FLAG_TEMPLATE = " + (if (%3$s) \" %1$s \" else \"\")"; + + public final String getCommandLineAddition() { + return String.format(getCommandLineTemplate(), getCommandLineParam(), getCommandLineFormat(), getFieldName()); + } + + protected String getCommandLineParam() { + return (argumentDefinition.shortName != null) + ? "-" + argumentDefinition.shortName + : "--" + argumentDefinition.fullName; + } + + protected String getCommandLineFormat() { + return "\"%s\""; + } + + @Override + protected String getScatterGatherAnnotation() { + return ""; + } + + protected String getCommandLineTemplate() { + return isRequired() ? REQUIRED_TEMPLATE : OPTIONAL_TEMPLATE; + } + + public static List getArgumentFields(Class classType) { + List argumentFields = new ArrayList(); + for (ArgumentSource argumentSource: ParsingEngine.extractArgumentSources(classType)) + for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions()) + argumentFields.addAll(getArgumentFields(argumentDefinition)); + return argumentFields; + } + + private static final List intervalFields = Arrays.asList("intervals", "excludeIntervals", "targetIntervals"); + + private static List getArgumentFields(ArgumentDefinition argumentDefinition) { + if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { + boolean scatter = "intervals".equals(argumentDefinition.fullName); + return Arrays.asList( + new IntervalFileArgumentField(argumentDefinition, scatter), + new IntervalStringArgumentField(argumentDefinition)); + + // ROD Bindings are set by the RodBindField + } else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { + // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets + return Collections.singletonList(new RodBindArgumentField(argumentDefinition, argumentDefinition.required)); + //return Collections.emptyList(); + + } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { + return Arrays.asList(new InputArgumentField(argumentDefinition), new IndexFilesField()); + + } else if (argumentDefinition.ioType == ArgumentIOType.INPUT) { + return Collections.singletonList(new InputArgumentField(argumentDefinition)); + + } else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) { + return Collections.singletonList(new OutputArgumentField(argumentDefinition)); + + } else if (argumentDefinition.isFlag) { + return Collections.singletonList(new FlagArgumentField(argumentDefinition)); + + } else if (argumentDefinition.isMultiValued) { + return Collections.singletonList(new MultiValuedArgumentField(argumentDefinition)); + + } else if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) { + boolean useFormat = useFormatter(argumentDefinition.argumentType); + List fields = new ArrayList(); + ArgumentField field = new OptionedArgumentField(argumentDefinition, useFormat); + fields.add(field); + if (useFormat) fields.add(new FormatterArgumentField(field)); + return fields; + + } else { + boolean useFormat = useFormatter(argumentDefinition.argumentType); + List fields = new ArrayList(); + ArgumentField field = new DefaultArgumentField(argumentDefinition, useFormat); + fields.add(field); + if (useFormat) fields.add(new FormatterArgumentField(field)); + return fields; + + } + } + + // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) + // Change intervals to an input file, and optionally scatter it. + private static class IntervalFileArgumentField extends InputArgumentField { + private final boolean scatter; + public IntervalFileArgumentField(ArgumentDefinition argumentDefinition, boolean scatter) { + super(argumentDefinition); + this.scatter = scatter; + } + + @Override protected boolean isMultiValued() { return !this.scatter && super.isMultiValued(); } + @Override public boolean isScatter() { return this.scatter; } + @Override protected String getScatterGatherAnnotation() { + return scatter ? String.format("@Scatter(classOf[IntervalScatterFunction])%n") : super.getScatterGatherAnnotation(); + } + + @Override + protected String getExclusiveOf() { + StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf()); + if (exclusiveOf.length() > 0) + exclusiveOf.append(","); + exclusiveOf.append(escape(argumentDefinition.fullName)).append("String"); + return exclusiveOf.toString(); + } + } + + // if (intervalFields.contains(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) + // Change intervals to a string but as an argument. + private static class IntervalStringArgumentField extends ArgumentDefinitionField { + public IntervalStringArgumentField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + + @SuppressWarnings("unchecked") + @Override protected Class getAnnotationIOClass() { return Argument.class; } + @Override protected Class getInnerType() { return String.class; } + @Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; } + @Override protected String getFullName() { return super.getFullName() + "String"; } + @Override protected String getFieldType() { return "List[String]"; } + @Override protected String getDefaultValue() { return "Nil"; } + @Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; } + + @Override + protected String getExclusiveOf() { + StringBuilder exclusiveOf = new StringBuilder(super.getExclusiveOf()); + if (exclusiveOf.length() > 0) + exclusiveOf.append(","); + exclusiveOf.append(escape(argumentDefinition.fullName)); + return exclusiveOf.toString(); + } + } + + // if (argumentDefinition.ioType == ArgumentIOType.INPUT) + // Map all inputs to files. Handles multi valued files. + private static class InputArgumentField extends ArgumentDefinitionField { + public InputArgumentField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + + @Override protected Class getInnerType() { return File.class; } + @Override protected String getFieldType() { return String.format(isMultiValued() ? "List[%s]" : "%s", getRawFieldType()); } + @Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; } + @Override protected String getCommandLineTemplate() { + return isMultiValued() ? REPEAT_TEMPLATE : super.getCommandLineTemplate(); + } + + protected String getRawFieldType() { return "File"; } + protected boolean isMultiValued() { return argumentDefinition.isMultiValued; } + } + + // if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) + // Map all outputs to files. + private static class OutputArgumentField extends ArgumentDefinitionField { + public OutputArgumentField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + + @Override protected Class getInnerType() { return File.class; } + @Override protected String getFieldType() { return "File"; } + @Override protected String getDefaultValue() { return "_"; } + + @Override public boolean isGather() { return true; } + @Override protected String getScatterGatherAnnotation() { + return String.format(SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType) + ? "@Gather(classOf[BamGatherFunction])%n" + : "@Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])%n"); + } + } + + // if (argumentDefinition.isFlag) + // Booleans should be set on the commandline only if they are true. + private static class FlagArgumentField extends ArgumentDefinitionField { + public FlagArgumentField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + + @Override protected Class getInnerType() { return boolean.class; } + @Override protected String getFieldType() { return "Boolean"; } + @Override protected String getDefaultValue() { return "_"; } + @Override protected String getCommandLineTemplate() { return FLAG_TEMPLATE; } + } + + // if (argumentDefinition.isMultiValued) + // Multi value arguments are mapped to List[] and use repeat. + private static class MultiValuedArgumentField extends ArgumentDefinitionField { + public MultiValuedArgumentField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + + @Override protected Class getInnerType() { return mapType(argumentDefinition.componentType); } + @Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); } + @Override protected String getDefaultValue() { return "Nil"; } + @Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; } + } + + // if (!argumentDefinition.required && useOption(argumentDefinition.argumentType)) + // Any optional arguments that are primitives / enums are wrapped in options. + private static class OptionedArgumentField extends ArgumentDefinitionField { + private final boolean useFormatter; + + public OptionedArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) { + super(argumentDefinition); + this.useFormatter = useFormatter; + } + + @Override protected Class getInnerType() { return mapType(argumentDefinition.argumentType); } + @Override protected String getFieldType() { return String.format("Option[%s]", getType(getInnerType())); } + @Override protected String getDefaultValue() { return "None"; } + @Override protected String getCommandLineTemplate() { return OPTIONAL_TEMPLATE; } + @Override protected String getCommandLineFormat() { + return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat(); + } + } + + // Any other @Arguments + private static class DefaultArgumentField extends ArgumentDefinitionField { + private final boolean useFormatter; + + public DefaultArgumentField(ArgumentDefinition argumentDefinition, boolean useFormatter) { + super(argumentDefinition); + this.useFormatter = useFormatter; + } + + @Override protected Class getInnerType() { return mapType(argumentDefinition.argumentType); } + @Override protected String getFieldType() { return getType(getInnerType()); } + @Override protected String getDefaultValue() { return "_"; } + @Override protected String getCommandLineFormat() { + return this.useFormatter ? getFieldName(this.getRawFieldName() + "Format") : super.getCommandLineFormat(); + } + } + + /** + * The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file. + */ + public static class RodBindArgumentField extends InputArgumentField { + private boolean isRequired; + public RodBindArgumentField(ArgumentDefinition argumentDefinition, boolean isRequired) { + super(argumentDefinition); + this.isRequired = isRequired; + } + + @Override protected boolean isRequired() { return this.isRequired; } + @Override protected String getRawFieldType() { return "RodBind"; } + } + + /** + * Adds optional inputs for the indexes of any bams or sams added to this function. + */ + private static class IndexFilesField extends ArgumentField { + @Override protected Class getAnnotationIOClass() { return Input.class; } + @Override public String getCommandLineAddition() { return ""; } + @Override protected String getDoc() { return "Dependencies on any index files for any bams or sams added to input_files"; } + @Override protected String getFullName() { return "index_files"; } + @Override protected boolean isRequired() { return false; } + @Override protected String getFieldType() { return "List[File]"; } + @Override protected String getDefaultValue() { return "Nil"; } + @Override protected Class getInnerType() { return File.class; } + @Override protected String getRawFieldName() { return "index_files"; } + @Override protected String getFreezeFields() { + return String.format( + "index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" + + "index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n"); + } + } + + private static class FormatterArgumentField extends ArgumentField { + private final ArgumentField argumentField; + public FormatterArgumentField(ArgumentField argumentField) { + this.argumentField = argumentField; + } + @Override protected Class getAnnotationIOClass() { return Argument.class; } + @Override public String getCommandLineAddition() { return ""; } + @Override protected String getDoc() { return "Format string for " + this.argumentField.getFullName(); } + @Override protected String getFullName() { return this.argumentField.getFullName() + "Format"; } + @Override protected boolean isRequired() { return false; } + @Override protected String getFieldType() { return "String"; } + @Override protected String getDefaultValue() { return "\"%s\""; } + @Override protected Class getInnerType() { return String.class; } + @Override protected String getRawFieldName() { return this.argumentField.getRawFieldName() + "Format"; } + } +} diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java new file mode 100644 index 000000000..ef7f6f729 --- /dev/null +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk; + +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMFileWriter; +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; +import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; + +import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.util.*; + +public abstract class ArgumentField { + + public Collection getImportStatements() { + List imports = new ArrayList(); + for (Class importClass: getImportClasses()) { + if (!isBuiltIn(importClass)) + imports.add("import " + importClass.getName().replace("$", ".")); + } + return imports; + } + + /** + * Returns true if a class is built in and doesn't need to be imported. + * @param argType The class to check. + * @return true if the class is built in and doesn't need to be imported + */ + private static boolean isBuiltIn(Class argType) { + return argType.isPrimitive() || argType == String.class || Number.class.isAssignableFrom(argType); + } + + /** @return Scala code defining the argument and it's annotation. */ + public final String getArgumentAddition() { + return String.format("%n" + + "/** %s */%n" + + "@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" + + "%svar %s: %s = %s%n", + getDoc(), + getAnnotationIOClass().getSimpleName(), + getFullName(), + getShortName(), + getDoc(), + isRequired(), + getExclusiveOf(), + getValidation(), + getScatterGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue()); + } + + /** @return Scala code to append to the command line. */ + public abstract String getCommandLineAddition(); + + // Argument Annotation + + /** @return Documentation for the annotation. */ + protected abstract String getDoc(); + + /** @return Annotation class of the annotation. */ + protected abstract Class getAnnotationIOClass(); + + /** @return Full name for the annotation. */ + protected abstract String getFullName(); + + /** @return Short name for the annotation or "". */ + protected String getShortName() { return ""; } + + /** @return true if the argument is required. */ + protected abstract boolean isRequired(); + + /** @return A comma separated list of arguments that may be substituted for this field. */ + protected String getExclusiveOf() { return ""; } + + /** @return A validation string for the argument. */ + protected String getValidation() { return ""; } + + /** @return A scatter or gather annotation with a line feed, or "". */ + protected String getScatterGatherAnnotation() { return ""; } + + // Scala + + /** @return The scala field type. */ + protected abstract String getFieldType(); + + /** @return The scala default value. */ + protected abstract String getDefaultValue(); + + /** @return The class of the field, or the component type if the scala field is a collection. */ + protected abstract Class getInnerType(); + + /** @return A custom command for overriding freeze. */ + protected String getFreezeFields() { return ""; } + + @SuppressWarnings("unchecked") + protected Collection> getImportClasses() { + return Arrays.asList(this.getInnerType(), getAnnotationIOClass()); + } + + /** @return True if this field uses @Scatter. */ + public boolean isScatter() { return false; } + + /** @return True if this field uses @Gather. */ + public boolean isGather() { return false; } + + /** @return The raw field name, which will be checked against scala build in types. */ + protected abstract String getRawFieldName(); + /** @return The field name checked against reserved words. */ + protected final String getFieldName() { + return getFieldName(this.getRawFieldName()); + } + + /** + * @param rawFieldName The raw field name + * @return The field name checked against reserved words. + */ + protected static String getFieldName(String rawFieldName) { + String fieldName = rawFieldName; + if (!StringUtils.isAlpha(fieldName.substring(0,1))) + fieldName = "_" + fieldName; + if (isReserved(fieldName) || fieldName.contains("-")) + fieldName = "`" + fieldName + "`"; + return fieldName; + } + + /** via http://www.scala-lang.org/sites/default/files/linuxsoft_archives/docu/files/ScalaReference.pdf */ + private static final List reservedWords = Arrays.asList( + "abstract", "case", "catch", "class", "def", + "do", "else", "extends", "false", "final", + "finally", "for", "forSome", "if", "implicit", + "import", "lazy", "match", "new", "null", + "object", "override", "package", "private", "protected", + "return", "sealed", "super", "this", "throw", + "trait", "try", "true", "type", "val", + "var", "while", "with", "yield"); + + protected static boolean isReserved(String word) { + return reservedWords.contains(word); + } + + /** + * On primitive types returns the capitalized scala type. + * @param argType The class to check for options. + * @return the simple name of the class. + */ + protected static String getType(Class argType) { + String type = argType.getSimpleName(); + + if (argType.isPrimitive()) + type = StringUtils.capitalize(type); + + if ("Integer".equals(type)) + type = "Int"; + + return type; + } + + protected static String escape(String string) { + return (string == null) ? "" : StringEscapeUtils.escapeJava(string); + } + + /** + * @param argType The class to check for options. + * @return true if option should be used. + */ + protected static boolean useOption(Class argType) { + return (argType.isPrimitive()) || (Number.class.isAssignableFrom(argType)) || (argType.isEnum()); + } + + /** + * @param argType The class to check for options. + * @return true if option should be used. + */ + protected static boolean useFormatter(Class argType) { + return (argType.equals(Double.class) || argType.equals(Double.TYPE) || + argType.equals(Float.class) || argType.equals(Float.TYPE)); + } + + // TODO: Use an annotation, type descriptor, anything but hardcoding these lists! + + protected static Class mapType(Class clazz) { + if (InputStream.class.isAssignableFrom(clazz)) return File.class; + if (SAMFileReader.class.isAssignableFrom(clazz)) return File.class; + if (OutputStream.class.isAssignableFrom(clazz)) return File.class; + if (GenotypeWriter.class.isAssignableFrom(clazz)) return File.class; + if (SAMFileWriter.class.isAssignableFrom(clazz)) return File.class; + if (PlatformUnitFilterHelper.class.isAssignableFrom(clazz)) return String.class; + return clazz; + } +} diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java new file mode 100644 index 000000000..cefca44da --- /dev/null +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/CommandLineProgramManager.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk; + +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.utils.classloader.PluginManager; + +import java.util.Collection; + +/** + * Finds all command line programs. + */ +public class CommandLineProgramManager extends PluginManager { + public CommandLineProgramManager() { + super(CommandLineProgram.class, "CommandLineProgram", "CLP"); + } + + public Collection> getValues() { + return this.pluginsByName.values(); + } +} diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java new file mode 100644 index 000000000..207da8a1f --- /dev/null +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk; + +import net.sf.picard.filter.SamRecordFilter; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.filters.FilterManager; +import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.Map.Entry; + +/** + * Generates Queue modules that can be used to run GATK walkers. + * + * ArgumentCollections are flattened into a single module. + */ +public class GATKExtensionsGenerator extends CommandLineProgram { + private static final Logger logger = Logger.getRootLogger(); + public static final String GATK_EXTENSIONS_PACKAGE_NAME = "org.broadinstitute.sting.queue.extensions.gatk"; + private static final String COMMANDLINE_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME; + private static final String FILTER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME; + private static final String WALKER_PACKAGE_NAME = GATK_EXTENSIONS_PACKAGE_NAME; + + @Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true) + public File outputDirectory; + + CommandLineProgramManager clpManager = new CommandLineProgramManager(); + GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine(); + WalkerManager walkerManager = new WalkerManager(); + FilterManager filterManager = new FilterManager(); + RMDTrackManager rmdTrackManager = new RMDTrackManager(); + + /** + * Required main method implementation. + * @param argv Command-line arguments. + */ + public static void main(String[] argv) { + try { + start(new GATKExtensionsGenerator(), argv); + System.exit(CommandLineProgram.result); + } catch (Exception e) { + exitSystemWithError(e); + } + } + + @Override + protected Collection getArgumentTypeDescriptors() { + List typeDescriptors = new ArrayList(); + typeDescriptors.add(new GenotypeWriterArgumentTypeDescriptor(GATKEngine)); + typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine)); + typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine)); + typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine)); + return typeDescriptors; + } + + @Override + protected int execute() { + try { + if (!outputDirectory.isDirectory() && !outputDirectory.mkdirs()) + throw new StingException("Unable to create output directory: " + outputDirectory); + + for (Class clp: clpManager.getValues()) { + + if (!isGatkProgram(clp)) + continue; + + String clpClassName = clpManager.getName(clp); + + writeClass("org.broadinstitute.sting.queue.function.JarCommandLineFunction", COMMANDLINE_PACKAGE_NAME, clpClassName, + "", ArgumentDefinitionField.getArgumentFields(clp)); + + if (clp == CommandLineGATK.class) { + for (Entry>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) { + for(Class walkerType: walkersByPackage.getValue()) { + String walkerName = walkerManager.getName(walkerType); + List argumentFields = new ArrayList(); + + argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(walkerType)); + argumentFields.addAll(RodBindField.getRodArguments(walkerType, rmdTrackManager)); + argumentFields.addAll(ReadFilterField.getFilterArguments(walkerType)); + + writeClass(COMMANDLINE_PACKAGE_NAME + "." + clpClassName, WALKER_PACKAGE_NAME, + walkerName, String.format("analysis_type = \"%s\"%n%n", walkerName), argumentFields); + } + } + } + } + + for (Class filter: filterManager.getValues()) { + String filterName = filterManager.getName(filter); + writeFilter(FILTER_PACKAGE_NAME, filterName, ArgumentDefinitionField.getArgumentFields(filter)); + } + + return 0; + } catch (IOException exception) { + logger.error("Error generating queue output.", exception); + return 1; + } + } + + private static final List gatkPackages = Arrays.asList( + "org.broadinstitute.sting.gatk", + "org.broadinstitute.sting.analyzecovariates"); + private boolean isGatkProgram(Class clazz) { + if (clazz.getPackage() == null) + return false; + String classPackage = clazz.getPackage().getName(); + for (String gatkPackage : gatkPackages) + if (classPackage.startsWith(gatkPackage)) + return true; + return false; + } + + private void writeClass(String baseClass, String packageName, String className, String constructor, + List argumentFields) throws IOException { + String content = getContent(CLASS_TEMPLATE, baseClass, packageName, className, constructor, "", argumentFields); + writeFile(packageName + "." + className, content); + } + + private void writeFilter(String packageName, String className, List argumentFields) throws IOException { + String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction", + packageName, className, "", String.format(" + \" -read_filter %s\"", className), argumentFields); + writeFile(packageName + "." + className, content); + } + + private void writeFile(String fullClassName, String content) throws IOException { + File outputFile = new File(outputDirectory, fullClassName.replace(".", "/") + ".scala"); + if (outputFile.exists()) { + String existingContent = FileUtils.readFileToString(outputFile); + if (StringUtils.equals(content, existingContent)) + return; + } + FileUtils.writeStringToFile(outputFile, content); + } + + private static String getContent(String scalaTemplate, String baseClass, String packageName, String className, + String constructor, String commandLinePrefix, List argumentFields) { + StringBuilder arguments = new StringBuilder(); + StringBuilder commandLine = new StringBuilder(commandLinePrefix); + + Set importSet = new HashSet(); + boolean isScatter = false; + boolean isGather = false; + List freezeFields = new ArrayList(); + for(ArgumentField argumentField: argumentFields) { + arguments.append(argumentField.getArgumentAddition()); + commandLine.append(argumentField.getCommandLineAddition()); + importSet.addAll(argumentField.getImportStatements()); + freezeFields.add(argumentField.getFreezeFields()); + + isScatter |= argumentField.isScatter(); + isGather |= argumentField.isGather(); + } + + if (isScatter) { + importSet.add("import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction"); + importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Scatter"); + baseClass += " with ScatterGatherableFunction"; + } + if (isGather) + importSet.add("import org.broadinstitute.sting.queue.function.scattergather.Gather"); + + // Sort the imports so that the are always in the same order. + List sortedImports = new ArrayList(importSet); + Collections.sort(sortedImports); + + StringBuffer freezeFieldOverride = new StringBuffer(); + for (String freezeField: freezeFields) + freezeFieldOverride.append(freezeField); + if (freezeFieldOverride.length() > 0) { + freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n")); + freezeFieldOverride.append(String.format("}%n%n")); + } + + // see CLASS_TEMPLATE and TRAIT_TEMPLATE below + return String.format(scalaTemplate, packageName, StringUtils.join(sortedImports, NEWLINE), + className, baseClass, constructor, arguments, freezeFieldOverride, commandLine); + } + + private static final String NEWLINE = String.format("%n"); + + private static final String CLASS_TEMPLATE = "package %s%n"+ + "%s%n" + + "class %s extends %s {%n" + + "%s%s%n" + + "%soverride def commandLine = super.commandLine%s%n" + + "}%n"; + + private static final String TRAIT_TEMPLATE = "package %s%n"+ + "%s%n" + + "trait %s extends %s {%n" + + "%s%s%n" + + "%sabstract override def commandLine = super.commandLine%s%n" + + "}%n"; +} diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java new file mode 100644 index 000000000..23eacceae --- /dev/null +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ReadFilterField.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk; + +import net.sf.picard.filter.SamRecordFilter; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.walkers.Walker; + +import java.util.ArrayList; +import java.util.List; + +public class ReadFilterField { + /** + * Adds an argument for each read filters listed on the walker. + * @param walkerClass the class of the walker + * @return the list of argument fields + */ + public static List getFilterArguments(Class walkerClass) { + List argumentFields = new ArrayList(); + for(Class filter: WalkerManager.getReadFilterTypes(walkerClass)) + argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(filter)); + return argumentFields; + } +} diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java new file mode 100644 index 000000000..7ae929b93 --- /dev/null +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk; + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackManager; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Walker; + +import java.io.File; +import java.lang.annotation.Annotation; +import java.util.ArrayList; +import java.util.List; + +/** + * Allows user to specify the rod file but locks in the track name and the track type. + */ +public class RodBindField extends ArgumentField { + public static final String ROD_BIND_FIELD = "rodBind"; + + private final String trackName; + private final String typeName; + private final List relatedFields; + private final boolean isRequired; + + public RodBindField(String trackName, String typeName, List relatedFields, boolean isRequired) { + this.trackName = trackName; + this.typeName = typeName; + this.relatedFields = relatedFields; + this.isRequired = isRequired; + } + + @SuppressWarnings("unchecked") + @Override protected Class getAnnotationIOClass() { return Input.class; } + @Override protected Class getInnerType() { return File.class; } + @Override protected String getFullName() { return escape(getRawFieldName()); } + @Override protected String getFieldType() { return "File"; } + @Override protected String getDefaultValue() { return "_"; } + @Override protected String getRawFieldName() { return this.trackName + this.typeName; } + @Override protected String getDoc() { return escape(this.typeName + " " + this.trackName); } + @Override protected boolean isRequired() { return this.isRequired; } + + @Override public String getCommandLineAddition() { + return String.format(this.useOption() + ? " + optional(\" -B %s,%s,\", %s)" + : " + \" -B %s,%s,\" + %s", + this.trackName, this.typeName, getFieldName()); + } + + private boolean useOption() { + return !this.isRequired || (relatedFields.size() > 1); + } + + @Override protected String getExclusiveOf() { + StringBuilder exclusiveOf = new StringBuilder(); + // TODO: Stop allowing the generic "rodBind" triplets to satisfy the requirement after @Requires are fixed. + if (this.isRequired) + exclusiveOf.append(ROD_BIND_FIELD); + for (RodBindField relatedField: relatedFields) + if (relatedField != this) { + if (exclusiveOf.length() > 0) + exclusiveOf.append(","); + exclusiveOf.append(relatedField.getFieldName()); + } + return exclusiveOf.toString(); + } + + public static List getRodArguments(Class walkerClass, RMDTrackManager rmdTrackManager) { + List argumentFields = new ArrayList(); + + List requires = WalkerManager.getRequiredMetaData(walkerClass); + List allows = WalkerManager.getAllowsMetaData(walkerClass); + + for (RMD required: requires) { + List fields = new ArrayList(); + String trackName = required.name(); + if ("*".equals(trackName)) { + // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers + //fields.add(new RodBindArgumentField(argumentDefinition, true)); + } else { + for (String typeName: rmdTrackManager.getTrackRecordTypeNames(required.type())) + fields.add(new RodBindField(trackName, typeName, fields, true)); + } + argumentFields.addAll(fields); + } + + for (RMD allowed: allows) { + List fields = new ArrayList(); + String trackName = allowed.name(); + if ("*".equals(trackName)) { + // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers + //fields.add(new RodBindArgumentField(argumentDefinition, false)); + } else { + for (String typeName: rmdTrackManager.getTrackRecordTypeNames(allowed.type())) + fields.add(new RodBindField(trackName, typeName, fields, true)); + } + argumentFields.addAll(fields); + } + + return argumentFields; + } +} diff --git a/scala/qscript/UnifiedGenotyperExample.scala b/scala/qscript/UnifiedGenotyperExample.scala new file mode 100644 index 000000000..714d4a4fd --- /dev/null +++ b/scala/qscript/UnifiedGenotyperExample.scala @@ -0,0 +1,63 @@ +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript + +class UnifiedGenotyperExample extends QScript { + qscript => + + @Input(doc="gatk jar file") + var gatkJar: File = _ + + @Input(doc="bam files", shortName="I") + var bamFiles: List[File] = Nil + + @Input(doc="interval list", shortName="L") + var intervals: File = _ + + @Input(doc="referenceFile", shortName="R") + var referenceFile: File = _ + + @Argument(doc="filter names", shortName="filter") + var filterNames: List[String] = Nil + + @Argument(doc="filter expressions", shortName="filterExpression") + var filterExpressions: List[String] = Nil + + @Argument(doc="job queue", shortName="queue", required=false) + var jobQueue = "broad" + + trait UnifiedGenotyperArguments extends CommandLineGATK { + this.jobQueue = qscript.jobQueue + this.jarFile = qscript.gatkJar + this.intervals = qscript.intervals + this.reference_sequence = qscript.referenceFile + } + + def script = { + for (bam <- bamFiles) { + val ug = new UnifiedGenotyper with UnifiedGenotyperArguments + val vf = new VariantFiltration with UnifiedGenotyperArguments + val ve = new VariantEval with UnifiedGenotyperArguments + + val pr = new PrintReads with UnifiedGenotyperArguments + pr.input_file :+= bam + pr.outputBamFile = swapExt(bam, "bam", "new.bam") + pr.scatterCount = 2 + pr.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = new File("/path/to/jar") } + add(pr) + + // Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh. + ug.scatterCount = 3 + ug.input_file :+= bam + ug.out = swapExt(bam, "bam", "unfiltered.vcf") + + vf.rodBind :+= RodBind("vcf", "VCF", ug.out) + vf.out = swapExt(bam, "bam", "filtered.vcf") + + ve.rodBind :+= RodBind("vcf", "VCF", vf.out) + ve.out = swapExt(bam, "bam", "eval") + + //add(ug, vf, ve) + } + + } +} diff --git a/scala/qscript/depristo/1kg_table1.scala b/scala/qscript/depristo/1kg_table1.scala index 89c989678..9434137a1 100755 --- a/scala/qscript/depristo/1kg_table1.scala +++ b/scala/qscript/depristo/1kg_table1.scala @@ -1,8 +1,16 @@ -import org.broadinstitute.sting.queue.QScript._ -// Other imports can be added here +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.{GenotypeMergeType, VariantMergeType} +import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory.VE2TemplateType +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript -val UNIVERSAL_GATK_ARGS = " -l INFO " // -L 1 -val unusedArgs = setArgs(args) +class Onekg_table1 extends QScript { + @Argument(doc="stage") + var stage: String = _ + + @Argument(doc="gatkJarFile") + var gatkJarFile: File = _ + +trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile } // -L 1 class Target(project: String, snpVCF: String, indelVCF: String, calledGenome: Double, targetGenome: Double, pop: String, pilot : String, bam: String = null) { def reportFile: String = List(pop, pilot, "report").mkString(".") @@ -40,9 +48,9 @@ for ( (pop: String, called) <- p2Targets ) targets ::= new Target("SRP000032", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/trio/snps/" + pop + ".trio.2010_03.genotypes.vcf.gz", "v1/dindel-v2/"+pop+".trio.2010_06.indel.genotypes.vcf", called, 2.85e9, pop, "pilot2") // pilot 3 -for (POP <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) { - val indels = if ( POP != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+POP+".exon.2010_06.genotypes.vcf.gz" else null - targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + POP + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, POP, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(POP)) +for (pop <- List("CEU", "CHB", "CHD", "JPT", "LWK", "TSI", "YRI")) { + val indels = if ( pop != "LWK" ) "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/indel/"+pop+".exon.2010_06.genotypes.vcf.gz" else null + targets ::= new Target("SRP000033", "/humgen/gsa-hpprojects/1kg/releases/pilot_paper_calls/exon/snps/" + pop + ".exon.2010_03.genotypes.vcf.gz", indels, 1.43e6, 1.43e6, pop, "pilot3", "/humgen/gsa-hpprojects/1kg/1kg_pilot3/useTheseBamsForAnalysis/pilot3.%s.cleaned.bam".format(pop)) } // merged files @@ -57,7 +65,7 @@ val INTERVALS = Map( "pilot3" -> "/humgen/gsa-hpprojects/1kg/1kg_pilot3/documents/CenterSpecificTargetLists/results/p3overlap.targets.b36.interval_list" ) -def setupStage(stage: String) = stage match { +def script = stage match { case "ALL" => // initial pilot1 merge -- autosomes + x for ( (pop: String,called) <- p1Targets ) { @@ -106,36 +114,36 @@ def setupStage(stage: String) = stage match { case _ => throw new Exception("Unknown stage" + stage) } -setupStage(unusedArgs(0)) - -// Populate parameters passed in via -P -setParams - -// Run the pipeline -run - // Using scala anonymous classes -class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends GatkFunction { - @Input(doc="foo") var vcfFile: File = new File(vcfIn) - @Output(doc="foo") var evalFile: File = new File(evalOut) +class VariantEval(vcfIn: String, evalOut: String, vcfType: String = "VCF") extends org.broadinstitute.sting.queue.extensions.gatk.VariantEval with UNIVERSAL_GATK_ARGS { + val vcfFile = new File(vcfIn) + this.rodBind :+= RodBind("eval", vcfType, vcfFile) + this.out = new File(evalOut) + this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod") + this.reportType = Some(VE2TemplateType.Grep) + this.evalModule :+= "CompOverlap" + override def dotString = "VariantEval: " + vcfFile.getName - def commandLine = gatkCommandLine("VariantEval") + UNIVERSAL_GATK_ARGS + "-D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_b36.rod -reportType Grep -B eval,%s,%s -o %s -E CompOverlap".format(vcfType, vcfFile, evalFile) } class StatPop(target: Target) extends CommandLineFunction { @Input(doc="foo") var snpVCF = new File(target.getSNPVCF) @Input(doc="foo") var snpEval = new File(target.getSNPEval) - @Input(doc="foo") var indelVCF = if (target.hasIndelVCF) new File(target.getIndelVCF) else {} + @Input(doc="foo", required=false) var indelVCF: File = if (target.hasIndelVCF) new File(target.getIndelVCF) else { null } @Output(doc="foo") var reportFile: File = new File(target.reportFile) override def dotString = "1kgStats: " + reportFile def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/1kgStatsForCalls.py -v -a pilot_data.alignment.index -s pilot_data.sequence.index -r /broad/1KG/DCC/ftp/ -o " + target.reportFile + " " + target.extraArgs + (if (target.hasDOC) " -c " + target.getDOCSummaryFile else "") + " --snpsEval " + target.getSNPEval + (if (target.hasIndelVCF) " --indels " + target.getIndelVCF else "") } -class Combine(vcfsInArg: List[String], vcfOutPath: String) extends GatkFunction { - @Input(doc="foo") var vcfs = vcfsInArg.map((x: String) => new File(x)) - @Output(doc="foo") var vcfFile: File = new File(vcfOutPath) +class Combine(vcfsInArg: List[String], vcfOutPath: String) extends org.broadinstitute.sting.queue.extensions.gatk.CombineVariants with UNIVERSAL_GATK_ARGS { + val vcfs = vcfsInArg.map((x: String) => new File(x)) + val vcfFile = new File(vcfOutPath) + this.variantmergeoption = Some(VariantMergeType.UNION) + this.genotypemergeoption = Some(GenotypeMergeType.PRIORITIZE) + this.out = vcfFile + this.rodBind ++= vcfs.map( input => RodBind(input.getName,"VCF",input) ) + this.rod_priority_list = vcfs.map( _.getName ).mkString(",") override def dotString = "CombineVariants: " + vcfs.map(_.getName).mkString(",") + " => " + vcfFile.getName - def commandLine = gatkCommandLine("CombineVariants") + UNIVERSAL_GATK_ARGS + "-variantMergeOptions UNION -genotypeMergeOptions PRIORITIZE -o %s %s -priority %s".format(vcfFile, vcfs.map( input => " -B %s,VCF,%s".format(input.getName,input)).mkString(""), vcfs.map( _.getName ).mkString(",")) } class MaskStats(pop: String) extends CommandLineFunction { @@ -143,9 +151,19 @@ class MaskStats(pop: String) extends CommandLineFunction { def commandLine = "python ~/dev/GenomeAnalysisTK/trunk/python/maskStats.py masks/" + pop + ".mask.fa.gz -x MT -x Y -o " + outFile } -class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends GatkFunction { - @Input(doc="foo") var bamFile: File = new File(bam) - @Output(doc="foo") var docFile: File = new File(docOutPath) +class DepthOfCoverage(bam: String, docOutPath: String, interval: String) extends org.broadinstitute.sting.queue.extensions.gatk.DepthOfCoverage with UNIVERSAL_GATK_ARGS { + val bamFile = new File(bam) + this.omitIntervalStatistics = true + this.omitDepthOutputAtEachBase = true + this.minBaseQuality = Some(0) + this.minMappingQuality = Some(0) + this.out = new File(docOutPath) + this.input_file :+= bamFile + if (interval != null) { + this.intervalsString :+= interval + this.excludeIntervalsString ++= List("MT", "Y") + } + override def dotString = "DOC: " + bamFile.getName - def commandLine = gatkCommandLine("DepthOfCoverage") + UNIVERSAL_GATK_ARGS + "-omitIntervals -omitBaseOutput -mbq 0 -mmq 0 -o %s -I %s".format(docFile, bamFile) + (if (interval != null) " -XL MT -XL Y -L " + interval else "") +} } diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q index cf09621bb..0e390911c 100755 --- a/scala/qscript/fullCallingPipeline.q +++ b/scala/qscript/fullCallingPipeline.q @@ -1,240 +1,61 @@ -import org.broadinstitute.sting.queue.function.scattergather.{ContigScatterFunction, FixMatesGatherFunction} +import org.broadinstitute.sting.gatk.DownsampleType +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model +import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript -import org.broadinstitute.sting.queue.QScript._ -// Other imports can be added here -val unparsedArgs = setArgs(args) +class fullCallingPipeline extends QScript { + qscript => -// very slow-to-run fast-to-write parse args function. Only worth changing if using lots of flags with lots of lookups. + @Argument(doc="contigIntervals", shortName="contigIntervals") + var contigIntervals: File = _ -def parseArgs(flag: String): String = { - var retNext: Boolean = false - for ( f <- unparsedArgs ) { - if ( retNext ) { - return f - } else { - if ( f.equals(flag) ) { - retNext = true - } - } - } - return "None" -} + @Argument(doc="numContigs", shortName="numContigs") + var numContigs: Int = _ -///////////////////////////////////////////////// -// step one: we need to create a set of realigner targets, one for each bam file -///////////////////////////////////////////////// -// todo -- make me less of a hack that makes Khalid cry -abstract class GatkFunctionLocal extends GatkFunction { - if ( QScript.inputs("interval_list").size > 0 ) { - this.intervals = QScript.inputs("interval_list").head - } else { - this.intervals = QScript.inputs("interval.list").head - } -} + @Argument(doc="project", shortName="project") + var project: String = _ -class RealignerTargetCreator extends GatkFunctionLocal { - @Gather(classOf[SimpleTextGatherFunction]) - @Output(doc="Realigner targets") - var realignerIntervals: File = _ + @Input(doc="trigger", shortName="trigger", required=false) + var trigger: File = _ - def commandLine = gatkCommandLine("RealignerTargetCreator") + "-o %s".format(realignerIntervals) -} + @Input(doc="refseqTable", shortName="refseqTable") + var refseqTable: File = _ -///////////////////////////////////////////////// -// step two: we need to clean each bam file - gather will fix mates -///////////////////////////////////////////////// + @Input(doc="dbsnpTable", shortName="dbsnpTable") + var dbsnpTable: File = _ -class IndelRealigner extends GatkFunction { - @Input(doc="Intervals to clean") - var intervalsToClean: File = _ - @Scatter(classOf[ContigScatterFunction]) - @Input(doc="Contig intervals") - var contigIntervals: File = _ - @Gather(classOf[FixMatesGatherFunction]) - @Output(doc="Cleaned bam file") - var cleanedBam: File = _ + @Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/") + var picardFixMatesJar: File = _ - this.javaTmpDir = parseArgs("-tmpdir") // todo -- hack, move into script or something + @Input(doc="intervals") + var intervals: File = _ - override def freeze = { - this.intervals = contigIntervals - this.jobQueue = "long" - super.freeze - } + @Input(doc="bam files", shortName="I") + var bamFiles: List[File] = Nil - def commandLine = gatkCommandLine("IndelRealigner") + "--output %s -targetIntervals %s -L %s".format(cleanedBam,intervalsToClean,contigIntervals) -} + @Input(doc="gatk jar") + var gatkJar: File = _ -///////////////////////////////////////////////// -// step three: we need to call (multisample) over all bam files -///////////////////////////////////////////////// - -class UnifiedGenotyper extends GatkFunctionLocal { - @Input(doc="An optional trigger track (trigger emit will be set to 0)",required=false) - var trigger: File = _ - @Input(doc="A list of comparison files for annotation",required=false) - var compTracks: List[(String,File)] = Nil - @Input(doc="Calling confidence level (may change depending on depth and number of samples)") - var callConf: Int = _ - @Gather(classOf[SimpleTextGatherFunction]) - @Output(doc="raw vcf") - var rawVCF: File = _ - - // todo -- add input for comps, triggers, etc - - def commandLine = gatkCommandLine("UnifiedGenotyper") + "-G Standard -A MyHaplotypeScore -varout %s".format(rawVCF) + - " -stand_emit_conf 10 -mmq 20 -mbq 20 -dt EXPERIMENTAL_BY_SAMPLE -dcov 200" + - " -stand_call_conf %d".format(callConf) + - ( if (trigger == null ) "" else " -trig_call_conf %d -trig_emit_conf 0 -B trigger,VCF,%s".format(callConf,trigger) ) + - makeCompString - - def makeCompString = { - var S: String = "" - for ( tup <- compTracks ) { - S += " -B comp%s,VCF,%s".format(tup._1,tup._2) - } - S - } -} - -///////////////////////////////////////////////// -// step four: we need to call indels (multisample) over all bam files -///////////////////////////////////////////////// - -class UnifiedGenotyperIndels extends GatkFunctionLocal { - @Gather(classOf[SimpleTextGatherFunction]) - @Output(doc="indel vcf") - var indelVCF: File = _ - // todo -- add inputs for the indel genotyper - - def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s -gm INDELS".format(indelVCF) -} - -///////////////////////////////////////////////// -// step five: we need to filter variants on cluster and with indel mask -///////////////////////////////////////////////// -class VariantFiltration extends GatkFunctionLocal { - @Input(doc="A VCF file to filter") - var unfilteredVCF: File = _ - @Input(doc="An interval mask to use to filter indels") - var indelMask: File = _ - @Input(doc="Filter names",required=false) - var filterNames: List[String] = Nil - @Input(doc="Filter expressions",required=false) - var filterExpressions: List[String] = Nil - @Output(doc="The input VCF file, but filtered") - var filteredVCF: File = _ - // to do -- snp cluster args? - - def commandLine = gatkCommandLine("VariantFiltration") + "-B variant,VCF,%s -B mask,VCF,%s --maskName NearIndel --clusterWindowSize 20 --clusterSize 7 -o %s".format(unfilteredVCF,indelMask,filteredVCF) + - "%s%s".format(repeat(" -filterName ",filterNames), repeat(" -filterExpression ",filterExpressions)) -} - -///////////////////////////////////////////////// -// step six: we need to generate gaussian clusters with the optimizer -///////////////////////////////////////////////// -class GenerateVariantClusters extends GatkFunctionLocal { - @Input(doc="A VCF that has been filtered for clusters and indels") - var initialFilteredVCF: File = _ - @Output(doc="Variant cluster file generated from input VCF") - var clusterFile: File = _ - // todo -- args for annotations? - // todo -- args for resources (properties file) - - override def freeze = { - // todo -- hacky change in memory limit -- fix this when more official roads to do this are in place - this.memoryLimit = Some(8) - this.jobQueue = "hugemem" - super.freeze - } - - def commandLine = gatkCommandLine("GenerateVariantClusters") + "-an QD -an SB -an MyHaplotypeScore -an HRun " + - "-resources /humgen/gsa-scr1/chartl/sting/R -B input,VCF,%s -clusterFile %s".format(initialFilteredVCF,clusterFile) -} - -///////////////////////////////////////////////// -// step seven: we need to apply gaussian clusters to our variants -///////////////////////////////////////////////// -class ApplyGaussianClusters extends GatkFunctionLocal { - @Input(doc="A VCF file to which to apply clusters") - var inputVCF: File = _ - @Input(doc="A variant cluster file") - var clusterFile: File = _ - @Output(doc="A quality-score recalibrated VCF file") - var recalibratedVCF: File = _ - // todo -- inputs for Ti/Tv expectation and other things - - def commandLine = gatkCommandLine("VariantRecalibrator") + "--target_titv 2.1 -resources /humgen/gsa-scr1/chartl/sting/R " + - "-B input,VCF,%s -clusterFile %s -output %s".format(inputVCF,clusterFile,recalibratedVCF) -} - -///////////////////////////////////////////////// -// step eight: we need to make tranches out of the recalibrated qualities -///////////////////////////////////////////////// -class ApplyVariantCuts extends GatkFunctionLocal { - @Input(doc="A VCF file that has been recalibrated") - var recalibratedVCF: File = _ - @Output(doc="A VCF file that has had tranches marked") - var tranchedVCF: File = _ - @Output(doc="A tranch dat file") - var tranchFile: File = _ - // todo -- fdr inputs, etc - - def commandLine = gatkCommandLine("ApplyVariantCuts") + - "-B input,VCF,%s -outputVCF %s --tranchesFile %s --fdr_filter_level 10.0".format(recalibratedVCF,tranchedVCF,tranchFile) -} - -///////////////////////////////////////////////// -// step nine: we need to annotate variants using the annotator [or maf, for now] -///////////////////////////////////////////////// -class GenomicAnnotator extends GatkFunctionLocal { - @Input(doc="A VCF file to be annotated") - var inputVCF: File = _ - @Input(doc="Refseq input table to use with the annotator") - var refseqTable: File = _ - @Input(doc="Dbsnp input table to use with the annotator") - var dbsnpTable: File = _ - @Gather(classOf[SimpleTextGatherFunction]) - @Output(doc="A genomically annotated VCF file") - var annotatedVCF: File = _ - - def commandLine = gatkCommandLine("GenomicAnnotator") + " -B variant,VCF,%s -B refseq,AnnotatorInputTable,%s -B dbsnp,AnnotatorInputTable,%s -vcf %s -s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet -BTI variant".format(inputVCF,refseqTable,dbsnpTable,annotatedVCF) -} - -///////////////////////////////////////////////// -// step ten: we need to evaluate variants with variant eval -///////////////////////////////////////////////// -class VariantEval extends GatkFunctionLocal { - @Input(doc="An optimized vcf file to evaluate") - var optimizedVCF: File = _ - @Input(doc="A hand-fitlered vcf file to evaluate") - var handFilteredVCF: File = _ - @Output(doc="An evaluation file") - var evalOutput: File = _ - // todo -- make comp tracks command-line arguments or properties - - def commandLine = gatkCommandLine("VariantEval") + "-B evalOptimized,VCF,%s -B evalHandFiltered,VCF,%s -E CountFunctionalClasses -E CompOverlap -E CountVariants -E TiTvVariantEvaluator -o %s".format(optimizedVCF,handFilteredVCF,evalOutput) +trait CommandLineGATKArgs extends CommandLineGATK { + this.intervals = qscript.intervals + this.jarFile = qscript.gatkJar } // ------------ SETUP THE PIPELINE ----------- // // todo -- the unclean and clean pipelines are the same, so the code can be condensed significantly + def script = { + val projectBase: String = qscript.project + val cleanedBase: String = projectBase + ".cleaned" + val uncleanedBase: String = projectBase + ".uncleaned" // there are commands that use all the bam files + var cleanBamFiles = List.empty[File] -val cleanSNPCalls = new UnifiedGenotyper -val uncleanSNPCalls = new UnifiedGenotyper -val cleanIndelCalls = new UnifiedGenotyperIndels -val uncleanIndelCalls = new UnifiedGenotyperIndels - -for ( bam <- inputs("bam") ) { +for ( bam <- bamFiles ) { // put unclean bams in unclean genotypers - uncleanSNPCalls.bamFiles :+= bam - uncleanIndelCalls.bamFiles :+= bam - // in advance, create the extension files val indel_targets = swapExt(bam,"bam","realigner_targets.interval_list") @@ -242,86 +63,129 @@ for ( bam <- inputs("bam") ) { // create the cleaning commands - val targetCreator = new RealignerTargetCreator - targetCreator.bamFiles :+= bam - targetCreator.realignerIntervals = indel_targets + val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs + targetCreator.input_file :+= bam + targetCreator.out = indel_targets - val realigner = new IndelRealigner - realigner.bamFiles = targetCreator.bamFiles - realigner.contigIntervals = new File(parseArgs("-contigIntervals")) - realigner.intervalsToClean = targetCreator.realignerIntervals - realigner.scatterCount = parseArgs("-numContigs").toInt - realigner.cleanedBam = cleaned_bam + val realigner = new IndelRealigner with CommandLineGATKArgs + realigner.input_file = targetCreator.input_file + realigner.intervals = qscript.contigIntervals + //realigner.targetIntervals = targetCreator.out + realigner.targetIntervals = targetCreator.out.getAbsolutePath + realigner.scatterCount = qscript.numContigs + realigner.out = cleaned_bam + realigner.scatterClass = classOf[ContigScatterFunction] + realigner.setupGatherFunction = { case (f: BamGatherFunction, _) => f.jarFile = qscript.picardFixMatesJar } + realigner.jobQueue = "long" // put clean bams in clean genotypers - cleanSNPCalls.bamFiles :+= realigner.cleanedBam - cleanIndelCalls.bamFiles :+= realigner.cleanedBam + cleanBamFiles :+= realigner.out add(targetCreator,realigner) } + endToEnd(uncleanedBase,bamFiles) + endToEnd(cleanedBase,cleanBamFiles) + } -val projectBase: String = parseArgs("-project") -val cleanedBase: String = projectBase + ".cleaned" -val uncleanedBase: String = projectBase + ".uncleaned" - -def endToEnd(base: String, snps: UnifiedGenotyper, indels: UnifiedGenotyperIndels) = { +def endToEnd(base: String, bamFiles: List[File]) = { // step through the un-indel-cleaned graph: // 1a. call snps and indels - snps.rawVCF = new File(base+".vcf") - snps.callConf = 30 - snps.trigger = new File(parseArgs("-trigger")) + val snps = new UnifiedGenotyper with CommandLineGATKArgs + snps.input_file = bamFiles + snps.group :+= "Standard" + snps.annotation :+= "MyHamplotypeScore" + snps.variants_out = new File(base+".vcf") + snps.standard_min_confidence_threshold_for_emitting = Some(10) + snps.min_mapping_quality_score = Some(20) + snps.min_base_quality_score = Some(20) + snps.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE) + snps.downsample_to_coverage = Some(200) + // todo -- add input for comps, triggers, etc + if (qscript.trigger != null) { + snps.trigger_min_confidence_threshold_for_calling = Some(30) + snps.rodBind :+= RodBind("trigger", "VCF", qscript.trigger) + } // todo -- hack -- get this from the command line, or properties - snps.compTracks :+= ( "comp1KG_CEU",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") ) - snps.compTracks :+= ( "comp1KG_ALL",new File(parseArgs("-trigger") ) ) + snps.rodBind :+= RodBind( "comp1KG_CEU", "VCF", new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/CEU.low_coverage.2010_07.sites.hg18.vcf.gz") ) + + + // TODO: what is the 1KG_ALL track? + //snps.rodBind :+= RodBind( "comp1KG_ALL", "VCF", qscript.trigger ) + + snps.scatterCount = 100 - indels.indelVCF = new File(base+".indels.vcf") + val indels = new UnifiedGenotyper with CommandLineGATKArgs + indels.input_file = bamFiles + indels.variants_out = new File(base+".indels.vcf") + indels.genotype_model = Some(Model.INDELS) indels.scatterCount = 100 + // todo -- add inputs for the indel genotyper // 1b. genomically annotate SNPs -- slow, but scatter it - val annotated = new GenomicAnnotator - annotated.inputVCF = snps.rawVCF - annotated.refseqTable = new File(parseArgs("-refseqTable")) - annotated.dbsnpTable = new File(parseArgs("-dbsnpTable")) - annotated.annotatedVCF = swapExt(snps.rawVCF,".vcf",".annotated.vcf") + val annotated = new GenomicAnnotator with CommandLineGATKArgs + annotated.rodBind :+= RodBind("variant", "VCF", snps.variants_out) + annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable) + annotated.rodBind :+= RodBind("dbsnp", "AnnotatorInputTable", qscript.dbsnpTable) + annotated.vcfOutput = swapExt(snps.variants_out,".vcf",".annotated.vcf") + annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet" + annotated.rodToIntervalTrackName = "variant" annotated.scatterCount = 100 // 2.a filter on cluster and near indels - val masker = new VariantFiltration - masker.unfilteredVCF = annotated.annotatedVCF - masker.indelMask = indels.indelVCF - masker.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".indel.masked.vcf") + val masker = new VariantFiltration with CommandLineGATKArgs + masker.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput) + masker.rodBind :+= RodBind("mask", "VCF", indels.variants_out) + masker.maskName = "NearIndel" + masker.clusterWindowSize = Some(20) + masker.clusterSize = Some(7) + masker.out = swapExt(annotated.vcfOutput,".vcf",".indel.masked.vcf") + // todo -- snp cluster args? // 2.b hand filter with standard filter - val handFilter = new VariantFiltration - handFilter.unfilteredVCF = annotated.annotatedVCF - handFilter.indelMask = indels.indelVCF - handFilter.filterNames = List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun") - handFilter.filterExpressions = List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"") - handFilter.filteredVCF = swapExt(annotated.annotatedVCF,".vcf",".handfiltered.vcf") + val handFilter = new VariantFiltration with CommandLineGATKArgs + handFilter.rodBind :+= RodBind("variant", "VCF", annotated.vcfOutput) + handFilter.rodBind :+= RodBind("mask", "VCF", indels.variants_out) + handFilter.filterName ++= List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun") + handFilter.filterExpression ++= List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"") + handFilter.out = swapExt(annotated.vcfOutput,".vcf",".handfiltered.vcf") // 3.i generate gaussian clusters on the masked vcf - val clusters = new GenerateVariantClusters - clusters.initialFilteredVCF = masker.filteredVCF - clusters.clusterFile = swapExt(snps.rawVCF,".vcf",".cluster") + val clusters = new GenerateVariantClusters with CommandLineGATKArgs + clusters.rodBind :+= RodBind("input", "VCF", masker.out) + //clusters.clusterFile = swapExt(snps.variants_out,".vcf",".cluster") + val clusters_clusterFile = swapExt(snps.variants_out,".vcf",".cluster") + clusters.clusterFile = clusters_clusterFile.getAbsolutePath + clusters.memoryLimit = Some(8) + clusters.jobQueue = "hugemem" + // todo -- args for annotations? + // todo -- args for resources (properties file) + clusters.use_annotation ++= List("QD", "SB", "MyHaplotypeScore", "HRun") + clusters.path_to_resources = "/humgen/gsa-scr1/chartl/sting/R" // 3.ii apply gaussian clusters to the masked vcf - val recalibrate = new ApplyGaussianClusters + val recalibrate = new VariantRecalibrator with CommandLineGATKArgs recalibrate.clusterFile = clusters.clusterFile - recalibrate.inputVCF = masker.filteredVCF - recalibrate.recalibratedVCF = swapExt(masker.filteredVCF,".vcf",".optimized.vcf") + recalibrate.rodBind :+= RodBind("input", "VCF", masker.out) + recalibrate.out = swapExt(masker.out,".vcf",".optimized.vcf") + // todo -- inputs for Ti/Tv expectation and other things + recalibrate.target_titv = Some(2.1) // 3.iii apply variant cuts to the clusters - val cut = new ApplyVariantCuts - cut.recalibratedVCF = recalibrate.recalibratedVCF - cut.tranchedVCF = swapExt(recalibrate.recalibratedVCF,".vcf",".tranched.vcf") - cut.tranchFile = swapExt(recalibrate.recalibratedVCF,".vcf",".tranch") + val cut = new ApplyVariantCuts with CommandLineGATKArgs + cut.rodBind :+= RodBind("input", "VCF", recalibrate.out) + //cut.outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf") + //cut.tranchesFile = swapExt(recalibrate.out,".vcf",".tranch") + val cut_outputVCFFile = swapExt(recalibrate.out,".vcf",".tranched.vcf") + val cut_tranchesFile = swapExt(recalibrate.out,".vcf",".tranch") + cut.outputVCFFile = cut_outputVCFFile.getAbsolutePath + cut.tranchesFile = cut_tranchesFile.getAbsolutePath + // todo -- fdr inputs, etc + cut.fdr_filter_level = Some(10) // 4. Variant eval the cut and the hand-filtered vcf files - val eval = new VariantEval - eval.optimizedVCF = cut.tranchedVCF - eval.handFilteredVCF = handFilter.filteredVCF - eval.evalOutput = new File(base+".eval") + val eval = new VariantEval with CommandLineGATKArgs + eval.rodBind :+= RodBind("evalOptimized", "VCF", cut_outputVCFFile) + eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out) + // todo -- make comp tracks command-line arguments or properties + eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator") + eval.out = new File(base+".eval") add(snps,indels,annotated,masker,handFilter,clusters,recalibrate,cut,eval) } -endToEnd(uncleanedBase,uncleanSNPCalls,uncleanIndelCalls) -endToEnd(cleanedBase,cleanSNPCalls,cleanIndelCalls) - -setParams -run +} diff --git a/scala/qscript/recalibrate.scala b/scala/qscript/recalibrate.scala index 539fc27ef..df4cb5f57 100755 --- a/scala/qscript/recalibrate.scala +++ b/scala/qscript/recalibrate.scala @@ -1,73 +1,77 @@ -import java.io.File -import org.broadinstitute.sting.queue.QScript._ +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript import org.apache.commons.io.FilenameUtils; -// Other imports can be added here -val unusedArgs = setArgs(args) +class recalibrate extends QScript { + @Input(doc="bamIn", shortName="I") + var bamIns: List[File] = Nil + + @Argument(doc="scatter") + var scatter = false -def runPipeline(arg: String) = { - val scatter = arg == "scatter" + @Argument(doc="gatk jar file") + var gatkJarFile: File = _ - for (bamIn <- inputs(".bam")) { +def script = { + for (bamIn <- bamIns) { val root = bamIn.getPath() val bamRoot = FilenameUtils.removeExtension(root); val recalData = new File(bamRoot + ".recal_data.csv") val recalBam = new File(bamRoot + ".recal.bam") val recalRecalData = new File(bamRoot + ".recal.recal_data.csv") //add(new CountCovariates(root, recalData, "-OQ")) - val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam, "-OQ") + val tableRecal = new TableRecalibrate(bamIn, recalData, recalBam) { useOriginalQualities = true } if ( scatter ) { tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list") tableRecal.scatterCount = 25 } add(tableRecal) add(new Index(recalBam)) - add(new CountCovariates(recalBam, recalRecalData, "-nt 4")) + add(new CountCovariates(recalBam, recalRecalData) { num_threads = Some(4) }) add(new AnalyzeCovariates(recalData, new File(recalData.getPath() + ".analyzeCovariates"))) add(new AnalyzeCovariates(recalRecalData, new File(recalRecalData.getPath() + ".analyzeCovariates"))) } } -runPipeline(unusedArgs(0)) - -// Populate parameters passed in via -P -setParams - -// Run the pipeline -run - def bai(bam: File) = new File(bam + ".bai") -class Index(bamIn: File) extends GatkFunction { - @Input(doc="foo") var bam = bamIn - @Output(doc="foo") var bamIndex = bai(bamIn) - memoryLimit = Some(1) - override def dotString = "Index: %s".format(bamIn.getName) - def commandLine = "samtools index %s".format(bam) +class Index(bamIn: File) extends BamIndexFunction { + bamFile = bamIn } -class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction { - @Input(doc="foo") var bam = bamIn - @Input(doc="foo") var bamIndex = bai(bamIn) - @Output(doc="foo") var recalData = recalDataIn - memoryLimit = Some(4) - override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, args) - def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData) +class CountCovariates(bamIn: File, recalDataIn: File) extends org.broadinstitute.sting.queue.extensions.gatk.CountCovariates { + this.jarFile = gatkJarFile + this.input_file :+= bamIn + this.recal_file = recalDataIn + this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod") + this.logging_level = "INFO" + this.max_reads_at_locus = Some(20000) + this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.memoryLimit = Some(4) + + override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, if (this.num_threads.isDefined) "-nt " + this.num_threads else "") } -class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args: String = "") extends GatkFunction { - @Input(doc="foo") var bamIn = bamInArg - @Input(doc="foo") var recalData = recalDataIn - @Gather(classOf[BamGatherFunction]) - @Output(doc="foo") var bamOut = bamOutArg - override def dotString = "TableRecalibrate: %s => %s [args %s]".format(bamInArg.getName, bamOutArg.getName, args) - memoryLimit = Some(2) - def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath()) +class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) extends org.broadinstitute.sting.queue.extensions.gatk.TableRecalibration { + this.jarFile = gatkJarFile + this.input_file :+= bamInArg + this.recal_file = recalDataIn + this.output_bam = bamOutArg + this.logging_level = "INFO" + this.memoryLimit = Some(2) + + override def dotString = "TableRecalibrate: %s => %s".format(bamInArg.getName, bamOutArg.getName, if (this.useOriginalQualities) " -OQ" else "") } -class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction { - @Input(doc="foo") var recalData = recalDataIn - memoryLimit = Some(4) +class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends org.broadinstitute.sting.queue.extensions.gatk.AnalyzeCovariates { + this.jarFile = new File("/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar") + this.recal_file = recalDataIn + this.output_dir = outputDir.toString + this.path_to_resources = "/home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/" + this.ignoreQ = Some(5) + this.path_to_Rscript = "/broad/tools/apps/R-2.6.0/bin/Rscript" + this.memoryLimit = Some(4) + override def dotString = "AnalyzeCovariates: %s".format(recalDataIn.getName) - def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir) +} } diff --git a/scala/qscript/rpoplin/variantRecalibrator.scala b/scala/qscript/rpoplin/variantRecalibrator.scala index 09e1e34f4..21a267465 100755 --- a/scala/qscript/rpoplin/variantRecalibrator.scala +++ b/scala/qscript/rpoplin/variantRecalibrator.scala @@ -1,7 +1,11 @@ -import org.broadinstitute.sting.queue.QScript._ -// Other imports can be added here +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript -setArgs(args) +class variantRecalibrator extends QScript { + @Argument(doc="gatkJarFile") + var gatkJarFile: File = _ + + def script = { val gList = List(30) val sList = List(0.0001, 0.01) @@ -13,66 +17,40 @@ for (g: Int <- gList) { for (d: Double <- dList) { for(b: Double <- bList) { - // Using classes defined below + // Using classes defined by QueueGATKExtensions.jar val gvc = new GenerateVariantClusters val vr = new VariantRecalibrator - gvc.maxGaussians = g - gvc.shrinkage = s - gvc.dirichlet = d - gvc.clusterFile = new File("g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b)) - gvc.jobOutputFile = swapExt(gvc.clusterFile, ".cluster", ".gvc.out") + gvc.jarFile = gatkJarFile + gvc.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf")) + gvc.logging_level = "INFO" + gvc.intervalsString :+= "20" + gvc.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun") + gvc.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/" + gvc.maxGaussians = Some(g) + gvc.shrinkage = Some(s) + gvc.shrinkageFormat = "%.6f" + gvc.dirichlet = Some(d) + gvc.dirichletFormat = "%.6f" + gvc.clusterFile = "g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b) + gvc.jobOutputFile = new File(gvc.clusterFile.stripSuffix(".cluster") + ".gvc.out") + vr.jarFile = gatkJarFile + vr.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf")) + vr.logging_level = "INFO" + vr.intervalsString :+= "20" + vr.target_titv = Some(2.1) + vr.ignore_filter :+= "HARD_TO_VALIDATE" + vr.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/" vr.clusterFile = gvc.clusterFile - vr.jobOutputFile = swapExt(vr.clusterFile, ".cluster", ".vr.out") - vr.backOff = b + vr.jobOutputFile = new File(vr.clusterFile.stripSuffix(".cluster") + ".vr.out") + vr.backOff = Some(b) + vr.backOffFormat = "%.2f" add(gvc, vr) } } } } - -// Populate parameters passed in via -P -setParams - -// Run the pipeline -run - - - -// A very basic GATK UnifiedGenotyper -class GenerateVariantClusters extends GatkFunction { - var maxGaussians: Int = _ - var shrinkage: Double = _ - var dirichlet: Double = _ - - @Output - var clusterFile: File = _ - - def commandLine = gatkCommandLine("GenerateVariantClusters") + - "-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " + - "-l INFO -L 20 -an QD -an SB -an HaplotypeScore -an HRun " + - "-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " + - "-mG %d ".format(maxGaussians) + - "-shrinkage %.6f ".format(shrinkage) + - "-dirichlet %.6f ".format(dirichlet) + - "-clusterFile %s".format(clusterFile) -} - -// A basic GATK VariantFiltration -class VariantRecalibrator extends GatkFunction { - var backOff: Double = _ - - @Input - var clusterFile: File = _ - - def commandLine = gatkCommandLine("VariantRecalibrator") + - "-B input20,VCF,/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf " + - "-l INFO -L 20 -titv 2.1 " + - "--ignore_filter HARD_TO_VALIDATE " + - "-resources /humgen/gsa-scr1/rpoplin/sting_dev_vb/R/ " + - "-backOff %.2f ".format(backOff) + - "-clusterFile %s ".format(clusterFile) + - "-output %s".format(clusterFile) + } } diff --git a/scala/qscript/unifiedgenotyper_example.properties b/scala/qscript/unifiedgenotyper_example.properties deleted file mode 100644 index 4c4668db1..000000000 --- a/scala/qscript/unifiedgenotyper_example.properties +++ /dev/null @@ -1,7 +0,0 @@ -gatkJar = /humgen/gsa-hpprojects/GATK/bin/current/GenomeAnalysisTK.jar -referenceFile = /path/to/reference.fasta -dbsnp = /path/to/dbsnp -intervals = /path/to/my.interval_list -jobNamePrefix = Q -memoryLimit = 2 -gatkLoggingLevel = INFO diff --git a/scala/qscript/unifiedgenotyper_example.scala b/scala/qscript/unifiedgenotyper_example.scala deleted file mode 100644 index d21a1ef6c..000000000 --- a/scala/qscript/unifiedgenotyper_example.scala +++ /dev/null @@ -1,54 +0,0 @@ -import org.broadinstitute.sting.queue.QScript._ - -setArgs(args) - -for (bam <- inputs("bam")) { - val ug = new UnifiedGenotyper - val vf = new VariantFiltration - val ve = new GatkFunction { - @Input(doc="vcf") var vcfFile: File = _ - @Output(doc="eval") var evalFile: File = _ - def commandLine = gatkCommandLine("VariantEval") + "-B eval,VCF,%s -o %s".format(vcfFile, evalFile) - } - - // Make sure the Sting/shell folder is in your path to use mergeText.sh and splitIntervals.sh. - ug.scatterCount = 3 - ug.bamFiles :+= bam - ug.vcfFile = swapExt(bam, "bam", "unfiltered.vcf") - - vf.vcfInput = ug.vcfFile - vf.vcfOutput = swapExt(bam, "bam", "filtered.vcf") - - ve.vcfFile = vf.vcfOutput - ve.evalFile = swapExt(bam, "bam", "eval") - - add(ug, vf, ve) -} - -setParams -run - - -class UnifiedGenotyper extends GatkFunction { - @Output(doc="vcf") - @Gather(classOf[SimpleTextGatherFunction]) - var vcfFile: File = _ - def commandLine = gatkCommandLine("UnifiedGenotyper") + "-varout %s".format(vcfFile) -} - -class VariantFiltration extends GatkFunction { - @Input(doc="input vcf") - var vcfInput: File = _ - - @Input(doc="filter names") - var filterNames: List[String] = Nil - - @Input(doc="filter expressions") - var filterExpressions: List[String] = Nil - - @Output(doc="output vcf") - var vcfOutput: File = _ - - def commandLine = gatkCommandLine("VariantFiltration") + "%s%s -B variant,VCF,%s -o %s" - .format(repeat(" -filterName ", filterNames), repeat(" -filterExpression ", filterExpressions), vcfInput, vcfOutput) -} diff --git a/scala/src/org/broadinstitute/sting/queue/QArguments.scala b/scala/src/org/broadinstitute/sting/queue/QArguments.scala deleted file mode 100755 index 5c921231b..000000000 --- a/scala/src/org/broadinstitute/sting/queue/QArguments.scala +++ /dev/null @@ -1,105 +0,0 @@ -package org.broadinstitute.sting.queue - -import collection.mutable.ListBuffer -import collection.JavaConversions._ -import org.broadinstitute.sting.queue.util.Logging -import org.broadinstitute.sting.utils.text.XReadLines -import java.io.{FileInputStream, File} -import java.util.Properties - -class QArguments(args: Array[String]) { - var bsubAllJobs = false - var bsubWaitJobs = false - var dryRun = false - val scripts = new ListBuffer[String] - var inputPaths = List.empty[File] - var properties = Map.empty[String, String] - - val userArgs = parseArgs(args) - - private def parseArgs(args: Array[String]) = { - var filtered = new ListBuffer[String] - filtered.appendAll(args) - - if (isFlagged(filtered, "-debug")) - Logging.setDebug - if (isFlagged(filtered, "-trace")) - Logging.setTrace - if (isFlagged(filtered, "-dry")) - dryRun = true - if (isFlagged(filtered, "-bsub")) - bsubAllJobs = true - if (isFlagged(filtered, "-bsubWait")) - bsubWaitJobs = true - for (arg <- getArgs(filtered, "-P")) - addProperties(arg) - for (arg <- getArgs(filtered, "-I")) - addFile(arg) - for (arg <- getArgs(filtered, "-S")) - scripts.append(arg) - - List(filtered:_*) - } - - private def isFlagged(filtered: ListBuffer[String], search: String) = { - var found = false - var index = 0 - while (0 <= index && index < filtered.size) { - index = filtered.indexOf(search) - if (index >= 0) { - found = true - filtered.remove(index) - } - } - found - } - - private def getArgs(filtered: ListBuffer[String], search: String) = { - var found = new ListBuffer[String] - var index = 0 - while (0 <= index && index < filtered.size) { - index = filtered.indexOf(search) - if (index >= 0) { - found.append(filtered(index+1)) - filtered.remove(index, 2) - } - } - found - } - - def addProperties(arg: String) = { - var file = new File(arg) - if (arg.contains("=") && !file.exists) { - val tokens = arg.split("=", 2) - properties += tokens(0) -> tokens(1) - } else if (arg.endsWith(".properties")) { - if (!file.exists) - throw new QException("File not found: " + file.getAbsolutePath) - var props = new Properties - props.load(new FileInputStream(file)) - for ((name, value) <- props) - properties += name -> value - } else { - throw new QException("Invalid property: " + arg) - } - } - - def addFile(arg: String): Unit = { - var file = new File(arg) - inputPaths :+= file - if (arg.endsWith(".list")) - new XReadLines(file).iterator.foreach(addFile(_)) - } -} - -object QArguments { - def strip(filtered: ListBuffer[String], search: String) = { - var index = 0 - while (0 <= index && index < filtered.size) { - index = filtered.indexOf(search) - if (index >= 0) { - filtered.remove(index, 2) - } - } - } -} diff --git a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index f59ea960b..1e4a05cad 100755 --- a/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -1,47 +1,115 @@ package org.broadinstitute.sting.queue -import tools.nsc.MainGenericRunner -import org.broadinstitute.sting.queue.util.ClasspathUtils -import collection.mutable.ListBuffer -import org.broadinstitute.sting.queue.util.Logging +import java.io.File +import java.util.Arrays +import org.broadinstitute.sting.queue.engine.QGraph +import org.broadinstitute.sting.commandline.{ClassType, Input, Argument, CommandLineProgram} +import org.broadinstitute.sting.queue.util.{Logging, ScalaCompoundArgumentTypeDescriptor} -object QCommandLine extends Application with Logging { - var usage = """usage: java -jar Queue.jar [-P name=value] [-P file.properties] [-I input.file] [-I input_files.list] [-bsub] [-bsubWait] [-dry] [-debug] -S pipeline.scala""" +/** + * Entry point of Queue. Compiles and runs QScripts passed in to the command line. + */ +class QCommandLine extends CommandLineProgram with Logging { + @Input(fullName="script", shortName="S", doc="QScript scala file", required=true) + @ClassType(classOf[File]) + private var scripts = List.empty[File] - override def main(args: Array[String]) = { - val qArgs: QArguments = try { - new QArguments(args) - } catch { - case exception => { - println(exception) - println(usage) - System.exit(-1) - } - null + @Argument(fullName="bsub_all_jobs", shortName="bsub", doc="Use bsub to submit jobs", required=false) + private var bsubAllJobs = false + + @Argument(fullName="bsub_wait_jobs", shortName="bsubWait", doc="Wait for bsub submitted jobs before exiting", required=false) + private var bsubWaitJobs = false + + @Argument(fullName="run_scripts", shortName="run", doc="Run QScripts", required=false) + private var run = false + + @Argument(fullName="dot_graph", shortName="dot", doc="Outputs the queue graph to a .dot file. See: http://en.wikipedia.org/wiki/DOT_language", required=false) + private var queueDot: File = _ + + /** + * Takes the QScripts passed in, runs their script() methods, retrieves their generated + * functions, and then builds and runs a QGraph based on the dependencies. + */ + def execute = { + val qGraph = new QGraph + qGraph.dryRun = !run + qGraph.bsubAllJobs = bsubAllJobs + qGraph.bsubWaitJobs = bsubWaitJobs + + val scripts = qScriptManager.createScripts() + for (script <- scripts) { + logger.info("Scripting " + qScriptManager.getName(script.getClass.asSubclass(classOf[QScript]))) + loadArgumentsIntoObject(script) + script.script + script.functions.foreach(qGraph.add(_)) + logger.info("Added " + script.functions.size + " functions") } - logger.debug("starting") - - if (qArgs.scripts.size == 0) { - println("Error: Missing script") - println(usage) - System.exit(-1) + logger.info("Binding functions") + qGraph.fillIn + if (queueDot != null) { + logger.info("Generating " + queueDot) + qGraph.renderToDot(queueDot) } - // NOTE: Something in MainGenericRunner is exiting the VM. - if (qArgs.scripts.size != 1) { - println("Error: Only one script can be run at a time") - println(usage) - System.exit(-1) - } + logger.info("Running generated graph") + qGraph.run + logger.info("Done") + 0 + } - val newArgs = new ListBuffer[String] - newArgs.appendAll(args) - QArguments.strip(newArgs, "-S") - newArgs.prepend("-nocompdaemon", "-classpath", ClasspathUtils.manifestAwareClassPath, qArgs.scripts.head) - MainGenericRunner.main(newArgs.toArray) + /** + * Returns true as QScripts are located and compiled. + * @return true + */ + override def canAddArgumentsDynamically = true - // NOTE: This line is not reached because the MainGenericRunner exits the VM. - logger.debug("exiting") + /** + * Returns the list of QScripts passed in via -S so that their + * arguments can be inspected before QScript.script is called. + * @return Array of QScripts passed in. + */ + override def getArgumentSources = + qScriptManager.getValues.asInstanceOf[Array[Class[_]]] + + /** + * Returns the name of a QScript + * @return The name of a QScript + */ + override def getArgumentSourceName(source: Class[_]) = + qScriptManager.getName(source.asSubclass(classOf[QScript])) + + /** + * Returns a ScalaCompoundArgumentTypeDescriptor that can parse argument sources into scala collections. + * @return a ScalaCompoundArgumentTypeDescriptor + */ + override def getArgumentTypeDescriptors = + Arrays.asList(new ScalaCompoundArgumentTypeDescriptor) + + /** + * Loads the QScripts passed in and returns a new QScriptManager than can be used to create them. + */ + private lazy val qScriptManager = { + QScriptManager.loadScripts(scripts) + new QScriptManager + } +} + +/** + * Entry point of Queue. Compiles and runs QScripts passed in to the command line. + */ +object QCommandLine { + /** + * Main. + * @param argv Arguments. + */ + def main(argv: Array[String]) { + try { + CommandLineProgram.start(new QCommandLine, argv); + if (CommandLineProgram.result != 0) + System.exit(CommandLineProgram.result); + } catch { + case e: Exception => CommandLineProgram.exitSystemWithError(e) + } } } diff --git a/scala/src/org/broadinstitute/sting/queue/QScript.scala b/scala/src/org/broadinstitute/sting/queue/QScript.scala index 7fa24e9ee..a795f664c 100755 --- a/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -1,109 +1,41 @@ package org.broadinstitute.sting.queue -import org.broadinstitute.sting.queue.function.CommandLineFunction -import org.broadinstitute.sting.queue.engine.QGraph +import org.broadinstitute.sting.queue.util.Logging /** - * Syntactic sugar for filling in a pipeline using a Scala script. + * Defines a Queue pipeline as a collection of CommandLineFunctions. */ -object QScript { +trait QScript extends Logging { // Type aliases so users don't have to import type File = java.io.File type Input = org.broadinstitute.sting.commandline.Input type Output = org.broadinstitute.sting.commandline.Output + type Argument = org.broadinstitute.sting.commandline.Argument + type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection type CommandLineFunction = org.broadinstitute.sting.queue.function.CommandLineFunction - type GatkFunction = org.broadinstitute.sting.queue.function.gatk.GatkFunction type ScatterGatherableFunction = org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction type Scatter = org.broadinstitute.sting.queue.function.scattergather.Scatter type Gather = org.broadinstitute.sting.queue.function.scattergather.Gather - type BamGatherFunction = org.broadinstitute.sting.queue.function.scattergather.BamGatherFunction type SimpleTextGatherFunction = org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction - // The arguments for executing pipelines - private var qArgs: QArguments = _ - - // A default pipeline. Can also use multiple 'new Pipeline()' - private val pipeline = new Pipeline + /** + * Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method. + */ + def script: Unit /** - * Initializes the QArguments and returns a list of the rest of the user args. + * The command line functions that will be executed for this QScript. */ - def setArgs(params: Array[String]) = { - qArgs = new QArguments(params) - qArgs.userArgs - } - - /** - * Returns a list of files that were specified with "-I " on the command line - * or inside a .list file. - */ - def inputs(extension: String) = qArgs.inputPaths.filter(_.getName.endsWith(extension)) + var functions = List.empty[CommandLineFunction] /** * Exchanges the extension on a file. */ - def swapExt(file: File, oldExtension: String, newExtension: String) = + protected def swapExt(file: File, oldExtension: String, newExtension: String) = new File(file.getName.stripSuffix(oldExtension) + newExtension) /** - * Adds one or more command line functions for dispatch later during run() + * Adds one or more command line functions to be run. */ - def add(functions: CommandLineFunction*) = pipeline.add(functions:_*) - - /** - * Sets the @Input and @Output values for all the functions - */ - def setParams(): Unit = pipeline.setParams() - - /** - * Sets the @Input and @Output values for a single function - */ - def setParams(function: CommandLineFunction): Unit = pipeline.setParams(function) - - /** - * Executes functions that have been added to the pipeline. - */ - def run() = pipeline.run() - - - /** - * Encapsulates a set of functions to run together. - */ - protected class Pipeline { - private var functions = List.empty[CommandLineFunction] - - /** - * Adds one or more command line functions for dispatch later during run() - */ - def add(functions: CommandLineFunction*) = - this.functions :::= List(functions:_*) - - /** - * Sets the @Input and @Output values for all the functions - */ - def setParams(): Unit = - for (function <- functions) setParams(function) - - /** - * Sets the @Input and @Output values for a single function - */ - def setParams(function: CommandLineFunction): Unit = - function.properties = qArgs.properties - - /** - * Executes functions that have been added to the pipeline. - */ - def run() = { - val qGraph = new QGraph - qGraph.dryRun = qArgs.dryRun - qGraph.bsubAllJobs = qArgs.bsubAllJobs - qGraph.bsubWaitJobs = qArgs.bsubWaitJobs - qGraph.properties = qArgs.properties - for (function <- functions) - qGraph.add(function) - qGraph.fillIn - qGraph.run - qGraph.renderToDot(new File("queue.dot")) - } - } + def add(functions: CommandLineFunction*) = this.functions ++= List(functions:_*) } diff --git a/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala new file mode 100644 index 000000000..1b8a00d91 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala @@ -0,0 +1,163 @@ +package org.broadinstitute.sting.queue + +import org.broadinstitute.sting.utils.classloader.PluginManager +import scala.tools.nsc.{Global, Settings} +import scala.tools.nsc.io.PlainFile +import org.broadinstitute.sting.queue.util.{Logging, ClasspathUtils, IOUtils} +import collection.JavaConversions +import java.io.File +import scala.tools.nsc.reporters.AbstractReporter +import java.lang.String +import org.apache.log4j.Level +import scala.tools.nsc.util.{FakePos, NoPosition, Position} + +/** + * Plugin manager for QScripts which loads QScripts into the current class loader. + */ +class QScriptManager extends PluginManager[QScript](classOf[QScript], "QScript", "Script") with Logging { + + /** + * Returns the list of QScripts classes found in the classpath. + * @return QScripts classes found in the classpath. + */ + def getValues = { + if (logger.isTraceEnabled) { + logger.trace(JavaConversions.asMap(this.pluginsByName) + .foreach{case (name, clazz) => "Found QScript %s: %s".format(name, clazz)}) + } + JavaConversions.asIterable(this.pluginsByName.values).toArray + } + + /** + * Creates the QScripts for all values found in the classpath. + * @return QScripts found in the classpath. + */ + def createScripts() = getValues.map(_.newInstance.asInstanceOf[QScript]) +} + +/** + * Plugin manager for QScripts which loads QScripts into the current classloader. + */ +object QScriptManager extends Logging { + /** + * Compiles and loads the scripts in the files into the current classloader. + * Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala + * @param scripts Scala classes to compile. + */ + def loadScripts(scripts: List[File]) { + if (scripts.size > 0) { + + val settings = new Settings((error: String) => logger.error(error)) + val outdir = IOUtils.tempDir("Q-classes").getAbsoluteFile + settings.outdir.value = outdir.getPath + + // Set the classpath to the current class path. + ClasspathUtils.manifestAwareClassPath.foreach(path => settings.classpath.append(path.getPath)) + + val reporter = new Log4JReporter(settings) + + val compiler = new Global(settings, reporter) + val run = new compiler.Run + + logger.debug("Compiling %s QScript%s".format(scripts.size, plural(scripts.size))) + logger.trace("Compilation directory: " + settings.outdir.value) + run.compileFiles(scripts.map(new PlainFile(_))) + + reporter.printSummary() + if (reporter.hasErrors) { + val msg = "Compile failed with %d error%s".format( + reporter.ERROR.count, plural(reporter.ERROR.count)) + throw new QException(msg) + } + else if (reporter.WARNING.count > 0) + logger.warn("Compile succeeded with %d warning%s".format( + reporter.WARNING.count, plural(reporter.WARNING.count))) + else + logger.debug("Compilation complete") + + // Add the new compilation output directory to the classpath. + ClasspathUtils.addClasspath(outdir) + } + } + + /** + * Returns the string "s" if x is greater than 1. + * @param x Value to test. + * @return "s" if x is greater than one else "". + */ + private def plural(x: Int) = if (x > 1) "s" else "" + + /** + * NSC (New Scala Compiler) reporter which logs to Log4J. + * Heavily based on scala/src/compiler/scala/tools/nsc/reporters/ConsoleReporter.scala + */ + private class Log4JReporter(val settings: Settings) extends AbstractReporter { + def displayPrompt = throw new UnsupportedOperationException("Unable to prompt the user. Prompting should be off.") + + /** + * Displays the message at position with severity. + * @param posIn Position of the event in the file that generated the message. + * @param msg Message to display. + * @param severity Severity of the event. + */ + def display(posIn: Position, msg: String, severity: Severity) = { + severity.count += 1 + val level = severity match { + case INFO => Level.INFO + case WARNING => Level.WARN + case ERROR => Level.ERROR + } + val pos = if (posIn eq null) NoPosition + else if (posIn.isDefined) posIn.inUltimateSource(posIn.source) + else posIn + pos match { + case FakePos(fmsg) => + printMessage(level, fmsg+" "+msg) + case NoPosition => + printMessage(level, msg) + case _ => + val buf = new StringBuilder(msg) + val file = pos.source.file + printMessage(level, file.name+":"+pos.line+": "+msg) + printSourceLine(level, pos) + } + } + + /** + * Prints a summary count of warnings and errors. + */ + def printSummary() = { + if (WARNING.count > 0) + printMessage(Level.WARN, countElementsAsString(WARNING.count, "warning") + " found") + if (ERROR.count > 0) + printMessage(Level.ERROR, countElementsAsString(ERROR.count, "error") + " found") + } + + /** + * Prints the source code line of an event followed by a pointer within the line to the error. + * @param level Severity level. + * @param pos Position in the file of the event. + */ + private def printSourceLine(level: Level, pos: Position) { + printMessage(level, pos.lineContent.stripLineEnd) + printColumnMarker(level, pos) + } + + /** + * Prints the column marker of the given position. + * @param level Severity level. + * @param pos Position in the file of the event. + */ + private def printColumnMarker(level: Level, pos: Position) = + if (pos.isDefined) { printMessage(level, " " * (pos.column - 1) + "^") } + + /** + * Prints the message at the severity level. + * @param level Severity level. + * @param message Message content. + */ + private def printMessage(level: Level, message: String) = { + logger.log(level, message) + } + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala deleted file mode 100755 index da23d3766..000000000 --- a/scala/src/org/broadinstitute/sting/queue/engine/CommandLineRunner.scala +++ /dev/null @@ -1,20 +0,0 @@ -package org.broadinstitute.sting.queue.engine - -import org.broadinstitute.sting.queue.util.{Logging, ProcessUtils} -import org.broadinstitute.sting.queue.function.CommandLineFunction - -/** - * Runs jobs one at a time locally - */ -trait CommandLineRunner extends Logging { - def run(function: CommandLineFunction, qGraph: QGraph) = { - if (logger.isDebugEnabled) { - logger.debug(function.commandDirectory + " > " + function.commandLine) - } else { - logger.info(function.commandLine) - } - - if (!qGraph.dryRun) - ProcessUtils.runCommandAndWait(function.commandLine, function.commandDirectory) - } -} diff --git a/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala index 88f48c1e5..d1d80d99b 100755 --- a/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/DispatchJobRunner.scala @@ -1,22 +1,38 @@ package org.broadinstitute.sting.queue.engine import collection.JavaConversions._ -import org.broadinstitute.sting.queue.function.{DispatchFunction, QFunction} +import org.broadinstitute.sting.queue.function.{CommandLineFunction, QFunction} import scala.collection.immutable.ListSet +/** + * Dispatches jobs to a compute cluster. + */ trait DispatchJobRunner { + /** Type of the job. */ type DispatchJobType - private var dispatchJobs = Map.empty[DispatchFunction, DispatchJobType] + /** An internal cache of all the jobs that have run by command line function. */ + private var dispatchJobs = Map.empty[CommandLineFunction, DispatchJobType] + /** An internal list of functions that have no other dependencies. */ private var waitJobsByGraph = Map.empty[QGraph, ListSet[DispatchJobType]] /** * Dispatches a function to the queue and returns immediately, unless the function is a DispatchWaitFunction * in which case it waits for all other terminal functions to complete. + * @param function Command to run. + * @param qGraph graph that holds the job, and if this is a dry run. */ - def dispatch(function: DispatchFunction, qGraph: QGraph) + def dispatch(function: CommandLineFunction, qGraph: QGraph) - protected def addJob(function: DispatchFunction, qGraph: QGraph, - dispatchJob: DispatchJobType, previousJobs: List[DispatchJobType]) = { + /** + * Adds the job to the internal cache of previous jobs and removes the previous jobs that + * the job was dependent on from the list of function that have no dependencies. + * @param function CommandLineFunction to add to the list. + * @param qGraph Current qGraph being iterated over. + * @param dispatchJob The job that is being added to the cache. + * @param previousJobs The previous jobs that the job was dependent one. + */ + protected def addJob(function: CommandLineFunction, qGraph: QGraph, + dispatchJob: DispatchJobType, previousJobs: Iterable[DispatchJobType]) = { dispatchJobs += function -> dispatchJob var waitJobs = getWaitJobs(qGraph) for (previousJob <- previousJobs) @@ -26,7 +42,10 @@ trait DispatchJobRunner { } /** - * Walks up the graph looking for the previous LsfJobs + * Walks up the graph looking for the previous LsfJobs. + * @param function Function to examine for a previous command line job. + * @param qGraph The graph that contains the jobs. + * @return A list of prior jobs. */ protected def previousJobs(function: QFunction, qGraph: QGraph) : List[DispatchJobType] = { var previous = List.empty[DispatchJobType] @@ -36,10 +55,10 @@ trait DispatchJobRunner { incomingEdge match { // Stop recursing when we find a job along the edge and return its job id - case dispatchFunction: DispatchFunction => previous :+= dispatchJobs(dispatchFunction) + case dispatchFunction: CommandLineFunction => previous :+= dispatchJobs(dispatchFunction) // For any other type of edge find the LSF jobs preceding the edge - case qFunction: QFunction => previous = previousJobs(qFunction, qGraph) ::: previous + case qFunction: QFunction => previous ++= previousJobs(qFunction, qGraph) } } previous @@ -47,10 +66,25 @@ trait DispatchJobRunner { /** * Returns a set of jobs that have no following jobs in the graph. + * @param qGraph The graph that contains the jobs. + * @return ListSet[DispatchJobType] of previous jobs that have no dependent jobs. */ protected def getWaitJobs(qGraph: QGraph) = { if (!waitJobsByGraph.contains(qGraph)) waitJobsByGraph += qGraph -> ListSet.empty[DispatchJobType] waitJobsByGraph(qGraph) } + + /** + * Builds a command line that can be run to force an automount of the directories. + * @param function Function to look jobDirectories. + * @return A "cd [&& cd ]" command. + */ + protected def mountCommand(function: CommandLineFunction) = { + val dirs = function.jobDirectories + if (dirs.size > 0) + Some("\'" + dirs.mkString("cd ", " && cd ", "") + "\'") + else + None + } } diff --git a/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala index dc0780527..d49534a24 100644 --- a/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/LsfJobRunner.scala @@ -1,55 +1,76 @@ package org.broadinstitute.sting.queue.engine -import collection.JavaConversions._ -import edu.mit.broad.core.lsf.LocalLsfJob -import java.util.ArrayList -import org.broadinstitute.sting.queue.util.Logging -import org.broadinstitute.sting.queue.function.{DispatchWaitFunction, DispatchFunction} +import org.broadinstitute.sting.queue.function.{CommandLineFunction, DispatchWaitFunction} +import org.broadinstitute.sting.queue.util.{IOUtils, LsfJob, Logging} +/** + * Runs jobs on an LSF compute cluster. + */ trait LsfJobRunner extends DispatchJobRunner with Logging { - type DispatchJobType = LocalLsfJob + type DispatchJobType = LsfJob - def dispatch(function: DispatchFunction, qGraph: QGraph) = { - val job = new LocalLsfJob - job.setName(function.jobName) - job.setOutputFile(function.jobOutputFile) - job.setErrFile(function.jobErrorFile) - job.setWorkingDir(function.commandDirectory) - job.setProject(function.jobProject) - job.setQueue(function.jobQueue) - job.setCommand(function.commandLine) + /** + * Dispatches the function on the LSF cluster. + * @param function Command to run. + * @param qGraph graph that holds the job, and if this is a dry run. + */ + def dispatch(function: CommandLineFunction, qGraph: QGraph) = { + val job = new LsfJob + job.name = function.jobName + job.outputFile = function.jobOutputFile + job.errorFile = function.jobErrorFile + job.project = function.jobProject + job.queue = function.jobQueue + job.command = function.commandLine - var extraArgs = List("-r") + if (!IOUtils.CURRENT_DIR.getCanonicalFile.equals(function.commandDirectory)) + job.workingDir = function.commandDirectory + + if (function.jobRestartable) + job.extraBsubArgs :+= "-r" if (function.memoryLimit.isDefined) - extraArgs :::= List("-R", "rusage[mem=" + function.memoryLimit.get + "]") + job.extraBsubArgs ++= List("-R", "rusage[mem=" + function.memoryLimit.get + "]") - val previous = + val previous: Iterable[LsfJob] = if (function.isInstanceOf[DispatchWaitFunction]) { - extraArgs :+= "-K" - getWaitJobs(qGraph).toList + job.waitForCompletion = true + getWaitJobs(qGraph) } else { previousJobs(function, qGraph) } - if (previous.size > 0) - extraArgs :::= List("-w", dependencyExpression(previous)) + mountCommand(function) match { + case Some(command) => job.preExecCommand = command + case None => /* ignore */ + } - job.setExtraBsubArgs(new ArrayList(extraArgs)) + if (previous.size > 0) + job.extraBsubArgs ++= List("-w", dependencyExpression(previous, function.jobRunOnlyIfPreviousSucceed)) addJob(function, qGraph, job, previous) if (logger.isDebugEnabled) { - logger.debug(function.commandDirectory + " > " + job.getBsubCommand.mkString(" ")) + logger.debug(function.commandDirectory + " > " + job.bsubCommand.mkString(" ")) } else { - logger.info(job.getBsubCommand.mkString(" ")) + logger.info(job.bsubCommand.mkString(" ")) } if (!qGraph.dryRun) - job.start + job.run } - private def dependencyExpression(jobs: List[LocalLsfJob]) = { - jobs.toSet[LocalLsfJob].map(_.getName).mkString("ended(\"", "\") && ended(\"", "\")") + /** + * Returns the dependency expression for the prior jobs. + * @param jobs Previous jobs this job is dependent on. + * @param runOnSuccess Run the job only if the previous jobs succeed. + * @return The dependency expression for the prior jobs. + */ + private def dependencyExpression(jobs: Iterable[LsfJob], runOnSuccess: Boolean) = { + val jobNames = jobs.toSet[LsfJob].map(_.name) + if (runOnSuccess) + jobNames.mkString("done(\"", "\") && done(\"", "\")") + else + jobNames.mkString("ended(\"", "\") && ended(\"", "\")") } } diff --git a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index 2670e82b3..9e1d68d86 100755 --- a/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -6,22 +6,27 @@ import scala.collection.JavaConversions import scala.collection.JavaConversions._ import org.broadinstitute.sting.queue.function.{MappingFunction, CommandLineFunction, QFunction} import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction -import org.broadinstitute.sting.queue.util.{CollectionUtils, Logging} +import org.broadinstitute.sting.queue.util.Logging import org.broadinstitute.sting.queue.QException import org.jgrapht.alg.CycleDetector import org.jgrapht.EdgeFactory import org.jgrapht.ext.DOTExporter -import org.broadinstitute.sting.queue.function.DispatchFunction -import org.broadinstitute.sting.queue.function.gatk.GatkFunction +import java.io.File +/** + * The internal dependency tracker between sets of function input and output files. + */ class QGraph extends Logging { var dryRun = true var bsubAllJobs = false var bsubWaitJobs = false - var properties = Map.empty[String, String] val jobGraph = newGraph def numJobs = JavaConversions.asSet(jobGraph.edgeSet).filter(_.isInstanceOf[CommandLineFunction]).size + /** + * Adds a QScript created CommandLineFunction to the graph. + * @param command Function to add to the graph. + */ def add(command: CommandLineFunction) { addFunction(command) } @@ -49,22 +54,30 @@ class QGraph extends Logging { jobGraph.removeAllVertices(jobGraph.vertexSet.filter(isOrphan(_))) } + /** + * Checks the functions for missing values and the graph for cyclic dependencies and then runs the functions in the graph. + */ def run = { var isReady = true + var totalMissingValues = 0 for (function <- JavaConversions.asSet(jobGraph.edgeSet)) { function match { case cmd: CommandLineFunction => - val missingValues = cmd.missingValues - if (missingValues.size > 0) { - isReady = false - logger.error("Missing values for function: %s".format(cmd.commandLine)) - for (missing <- missingValues) + val missingFieldValues = cmd.missingFields + if (missingFieldValues.size > 0) { + totalMissingValues += missingFieldValues.size + logger.error("Missing %s values for function: %s".format(missingFieldValues.size, cmd.commandLine)) + for (missing <- missingFieldValues) logger.error(" " + missing) } case _ => } } + if (totalMissingValues > 0) { + isReady = false + } + val detector = new CycleDetector(jobGraph) if (detector.detectCycles) { logger.error("Cycles were detected in the graph:") @@ -75,11 +88,29 @@ class QGraph extends Logging { if (isReady || this.dryRun) (new TopologicalJobScheduler(this) with LsfJobRunner).runJobs + + if (totalMissingValues > 0) { + logger.error("Total missing values: " + totalMissingValues) + } + + if (isReady && this.dryRun) { + logger.info("Dry run completed successfully!") + logger.info("Re-run with \"-run\" to execute the functions.") + } } + /** + * Creates a new graph where if new edges are needed (for cyclic dependency checking) they can be automatically created using a generic MappingFunction. + * @return A new graph + */ private def newGraph = new SimpleDirectedGraph[QNode, QFunction](new EdgeFactory[QNode, QFunction] { - def createEdge(input: QNode, output: QNode) = new MappingFunction(input.items, output.items)}) + def createEdge(input: QNode, output: QNode) = new MappingFunction(input.files, output.files)}) + /** + * Adds a generic QFunction to the graph. + * If the function is scatterable and the jobs request bsub, splits the job into parts and adds the parts instead. + * @param f Generic QFunction to add to the graph. + */ private def addFunction(f: QFunction): Unit = { try { f.freeze @@ -113,31 +144,53 @@ class QGraph extends Logging { } } - private def addCollectionInputs(value: Any): Unit = { - CollectionUtils.foreach(value, (item, collection) => - addMappingEdge(item, collection)) + /** + * Checks to see if the set of files has more than one file and if so adds input mappings between the set and the individual files. + * @param files Set to check. + */ + private def addCollectionInputs(files: Set[File]): Unit = { + if (files.size > 1) + for (file <- files) + addMappingEdge(Set(file), files) } - private def addCollectionOutputs(value: Any): Unit = { - CollectionUtils.foreach(value, (item, collection) => - addMappingEdge(collection, item)) + /** + * Checks to see if the set of files has more than one file and if so adds output mappings between the individual files and the set. + * @param files Set to check. + */ + private def addCollectionOutputs(files: Set[File]): Unit = { + if (files.size > 1) + for (file <- files) + addMappingEdge(files, Set(file)) } - private def addMappingEdge(input: Any, output: Any) = { - val inputSet = asSet(input) - val outputSet = asSet(output) - val hasEdge = inputSet == outputSet || - jobGraph.getEdge(QNode(inputSet), QNode(outputSet)) != null || - jobGraph.getEdge(QNode(outputSet), QNode(inputSet)) != null + /** + * Adds a directed graph edge between the input set and the output set if there isn't a direct relationship between the two nodes already. + * @param input Input set of files. + * @param output Output set of files. + */ + private def addMappingEdge(input: Set[File], output: Set[File]) = { + val hasEdge = input == output || + jobGraph.getEdge(QNode(input), QNode(output)) != null || + jobGraph.getEdge(QNode(output), QNode(input)) != null if (!hasEdge) - addFunction(new MappingFunction(inputSet, outputSet)) + addFunction(new MappingFunction(input, output)) } - private def asSet(value: Any): Set[Any] = if (value.isInstanceOf[Set[_]]) value.asInstanceOf[Set[Any]] else Set(value) - + /** + * Returns true if the edge is an internal mapping edge. + * @param edge Edge to check. + * @return true if the edge is an internal mapping edge. + */ private def isMappingEdge(edge: QFunction) = edge.isInstanceOf[MappingFunction] + /** + * Returns true if the edge is mapping edge that is not needed because it does + * not direct input or output from a user generated CommandLineFunction. + * @param edge Edge to check. + * @return true if the edge is not needed in the graph. + */ private def isFiller(edge: QFunction) = { if (isMappingEdge(edge)) { if (jobGraph.outgoingEdgesOf(jobGraph.getEdgeTarget(edge)).size == 0) @@ -148,9 +201,19 @@ class QGraph extends Logging { } else false } + /** + * Returns true if the node is not connected to any edges. + * @param node Node (set of files) to check + * @return true if this set of files is not needed in the graph. + */ private def isOrphan(node: QNode) = (jobGraph.incomingEdgesOf(node).size + jobGraph.outgoingEdgesOf(node).size) == 0 + /** + * Outputs the graph to a .dot file. + * http://en.wikipedia.org/wiki/DOT_language + * @param file Path to output the .dot file. + */ def renderToDot(file: java.io.File) = { val out = new java.io.FileWriter(file) diff --git a/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala b/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala index 01d3b814c..480c1c88f 100644 --- a/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/QNode.scala @@ -1,6 +1,9 @@ package org.broadinstitute.sting.queue.engine +import java.io.File + /** * Represents a state between QFunctions the directed acyclic QGraph + * @param files The set of files that represent this node state. */ -case class QNode (val items: Set[Any]) +case class QNode (val files: Set[File]) diff --git a/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala b/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala new file mode 100755 index 000000000..abffa3c08 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/engine/ShellJobRunner.scala @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.queue.engine + +import org.broadinstitute.sting.queue.util.{Logging, ShellJob} +import org.broadinstitute.sting.queue.function.CommandLineFunction + +/** + * Runs jobs one at a time locally + */ +trait ShellJobRunner extends Logging { + /** + * Runs the function on the local shell. + * @param function Command to run. + * @param qGraph graph that holds the job, and if this is a dry run. + */ + def run(function: CommandLineFunction, qGraph: QGraph) = { + val job = new ShellJob + job.command = function.commandLine + job.workingDir = function.commandDirectory + job.outputFile = function.jobOutputFile + job.errorFile = function.jobErrorFile + + if (logger.isDebugEnabled) { + logger.debug(function.commandDirectory + " > " + function.commandLine) + } else { + logger.info(function.commandLine) + } + + if (!qGraph.dryRun) + job.run + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala b/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala index 0831e184f..23a69846f 100755 --- a/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala +++ b/scala/src/org/broadinstitute/sting/queue/engine/TopologicalJobScheduler.scala @@ -7,21 +7,29 @@ import org.broadinstitute.sting.queue.util.Logging import org.broadinstitute.sting.queue.function._ /** - * Loops over the job graph running jobs as the edges are traversed + * Loops over the job graph running jobs as the edges are traversed. + * @param val The graph that contains the jobs to be run. */ abstract class TopologicalJobScheduler(private val qGraph: QGraph) - extends CommandLineRunner with DispatchJobRunner with Logging { + extends ShellJobRunner with DispatchJobRunner with Logging { protected val iterator = new TopologicalOrderIterator(qGraph.jobGraph) iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QFunction] { + /** + * As each edge is traversed, either dispatch the job or run it locally. + * @param event Event holding the edge that was passed. + */ override def edgeTraversed(event: EdgeTraversalEvent[QNode, QFunction]) = event.getEdge match { - case f: DispatchFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph) + case f: CommandLineFunction if (qGraph.bsubAllJobs) => dispatch(f, qGraph) case f: CommandLineFunction => run(f, qGraph) case f: MappingFunction => /* do nothing for mapping functions */ } }) + /** + * Runs the jobs by traversing the graph. + */ def runJobs = { logger.info("Number of jobs: %s".format(qGraph.numJobs)) if (logger.isTraceEnabled) @@ -39,7 +47,6 @@ abstract class TopologicalJobScheduler(private val qGraph: QGraph) if (qGraph.bsubAllJobs && qGraph.bsubWaitJobs) { logger.info("Waiting for jobs to complete.") val wait = new DispatchWaitFunction - wait.properties = qGraph.properties wait.freeze dispatch(wait, qGraph) } diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala new file mode 100644 index 000000000..13ce477c4 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.queue.function.JarCommandLineFunction +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.queue.function.scattergather.GatherFunction + +/** + * Merges BAM files using Picards MergeSampFiles.jar. + * At the Broad the jar can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/") + */ +class BamGatherFunction extends GatherFunction with JarCommandLineFunction { + @Argument(doc="Compression level 1-9", required=false) + var compressionLevel: Option[Int] = None + + override def commandLine = super.commandLine + "%s%s%s".format( + optional(" COMPRESSION_LEVEL=", compressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts)) +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala new file mode 100644 index 000000000..82ef24b2d --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamIndexFunction.scala @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.queue.function.CommandLineFunction +import java.io.File +import org.broadinstitute.sting.commandline.{Argument, Output, Input} + +/** + * Indexes a BAM file. + * By default uses samtools index. + * The syntax of the script must be: + * + */ +class BamIndexFunction extends CommandLineFunction { + @Argument(doc="BAM file script") + var bamIndexScript: String = "samtools index" + + @Input(doc="BAM file to index") + var bamFile: File = _ + + @Output(doc="BAM file index to output", required=false) + var bamFileIndex: File = _ + + /** + * Sets the bam file index to the bam file name + ".bai". + */ + override def freezeFieldValues = { + super.freezeFieldValues + if (bamFileIndex == null && bamFile != null) + bamFileIndex = new File(bamFile.getPath + ".bai") + } + + def commandLine = "%s %s %s".format(bamIndexScript, bamFile, bamFileIndex) + + override def dotString = "Index: %s".format(bamFile.getName) +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala new file mode 100755 index 000000000..01de9c8f9 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/ContigScatterFunction.scala @@ -0,0 +1,8 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +/** + * Splits intervals by contig instead of evenly. + */ +class ContigScatterFunction extends IntervalScatterFunction { + splitIntervalsScript = "splitIntervalsByContig.py" +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala new file mode 100644 index 000000000..dfb94d48f --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -0,0 +1,16 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.commandline.Argument +import org.broadinstitute.sting.queue.function.scattergather.ScatterFunction + +/** + * An interval scatter function that allows the script to be swapped out. + * The syntax of the script must be: + * [.. ] + */ +class IntervalScatterFunction extends ScatterFunction { + @Argument(doc="Interval split script") + var splitIntervalsScript: String = "splitIntervals.sh" + + def commandLine = "%s %s%s".format(splitIntervalsScript, originalInput, repeat(" ", scatterParts)) +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala new file mode 100644 index 000000000..bce054ba0 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala @@ -0,0 +1,14 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import java.io.File +import org.broadinstitute.sting.queue.function.FileProvider + +/** + * Used to provide -B rodBinding arguments to the GATK. + */ +case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider { + require(trackName != null, "RodBind trackName cannot be null") + require(trackType != null, "RodBind trackType cannot be null") + require(file != null, "RodBind file cannot be null") + override def toString = "%s,%s,%s".format(trackName, trackType, file) +} diff --git a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index d94ac998b..847103f10 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -1,74 +1,402 @@ package org.broadinstitute.sting.queue.function import org.broadinstitute.sting.queue.util._ -import java.lang.reflect.Field import java.lang.annotation.Annotation -import org.broadinstitute.sting.commandline.{Input, Output} +import org.broadinstitute.sting.commandline._ +import java.io.File +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.function.scattergather.{SimpleTextGatherFunction, Gather} +import java.lang.management.ManagementFactory +import org.broadinstitute.sting.queue.QException -trait CommandLineFunction extends InputOutputFunction with DispatchFunction { - var properties = Map.empty[String, String] +/** + * A command line that will be run in a pipeline. + */ +trait CommandLineFunction extends QFunction with Logging { + def commandLine: String - def inputFieldsWithValues = inputFields.filter(hasFieldValue(_)) - def outputFieldsWithValues = outputFields.filter(hasFieldValue(_)) + /** Upper memory limit */ + var memoryLimit: Option[Int] = None + + /** Whether a job is restartable */ + var jobRestartable = true + + /** Directory to run the command in. */ + var commandDirectory: File = IOUtils.CURRENT_DIR + + /** Prefix for automatic job name creation */ + var jobNamePrefix: String = CommandLineFunction.processNamePrefix + + /** The name name of the job */ + var jobName: String = _ + + /** Job project to run the command */ + var jobProject = "Queue" + + /** Job queue to run the command */ + var jobQueue = "broad" + + /** Temporary directory to write any files */ + var jobTempDir: File = new File(System.getProperty("java.io.tmpdir")) + + /** If true this function will run only if the jobs it is dependent on succeed. */ + var jobRunOnlyIfPreviousSucceed = true + + /** File to redirect any output. Defaults to .out */ + @Output(doc="File to redirect any output", required=false) + @Gather(classOf[SimpleTextGatherFunction]) + var jobOutputFile: File = _ + + /** File to redirect any errors. Defaults to .out */ + @Output(doc="File to redirect any errors", required=false) + @Gather(classOf[SimpleTextGatherFunction]) + var jobErrorFile: File = _ + + /** The complete list of fields on this CommandLineFunction. */ + lazy val functionFields: List[ArgumentSource] = ParsingEngine.extractArgumentSources(this.getClass).toList + /** The @Input fields on this CommandLineFunction. */ + lazy val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input])) + /** The @Output fields on this CommandLineFunction. */ + lazy val outputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Output])) + /** The @Argument fields on this CommandLineFunction. */ + lazy val argumentFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Argument])) /** - * Sets parameters from the arg map. + * Returns set of directories required to run the command. + * @return Set of directories required to run the command. */ - override def freeze = { - for ((name, value) <- properties) addOrUpdateWithStringValue(name, value) + def jobDirectories = { + var dirs = Set.empty[File] + dirs += commandDirectory + if (jobTempDir != null) + dirs += jobTempDir + dirs ++= inputs.map(_.getParentFile) + dirs ++= outputs.map(_.getParentFile) + dirs + } + + /** + * Returns the input files for this function. + * @return Set[File] inputs for this function. + */ + def inputs = getFieldFiles(inputFields) + + /** + * Returns the output files for this function. + * @return Set[File] outputs for this function. + */ + def outputs = getFieldFiles(outputFields) + + /** + * Gets the files from the fields. The fields must be a File, a FileProvider, or a List or Set of either. + * @param fields Fields to get files. + * @return Set[File] for the fields. + */ + private def getFieldFiles(fields: List[ArgumentSource]): Set[File] = { + var files = Set.empty[File] + for (field <- fields) + files ++= getFieldFiles(field) + files + } + + /** + * Gets the files from the field. The field must be a File, a FileProvider, or a List or Set of either. + * @param fields Field to get files. + * @return Set[File] for the field. + */ + def getFieldFiles(field: ArgumentSource): Set[File] = { + var files = Set.empty[File] + CollectionUtils.foreach(getFieldValue(field), (fieldValue) => { + val file = fieldValueToFile(field, fieldValue) + if (file != null) + files += file + }) + files + } + + /** + * Gets the file from the field. The field must be a File or a FileProvider and not a List or Set. + * @param field Field to get the file. + * @return File for the field. + */ + def getFieldFile(field: ArgumentSource): File = + fieldValueToFile(field, getFieldValue(field)) + + /** + * Converts the field value to a file. The field must be a File or a FileProvider. + * @param field Field to get the file. + * @param value Value of the File or FileProvider or null. + * @return Null if value is null, otherwise the File. + * @throws QException if the value is not a File or FileProvider. + */ + private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match { + case file: File => file + case fileProvider: FileProvider => fileProvider.file + case null => null + case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or implement FileProvider: %s: %s".format(field.field, unknown)) + } + + /** + * Resets the field to the temporary directory. + * @param field Field to get and set the file. + * @param tempDir new root for the file. + */ + def resetFieldFile(field: ArgumentSource, tempDir: File): File = { + getFieldValue(field) match { + case file: File => { + val newFile = IOUtils.resetParent(tempDir, file) + setFieldValue(field, newFile) + newFile + } + case fileProvider: FileProvider => { + fileProvider.file = IOUtils.resetParent(tempDir, fileProvider.file) + fileProvider.file + } + case null => null + case unknown => + throw new QException("Unable to set file from %s: %s".format(field, unknown)) + } + } + + /** + * The function description in .dot files + */ + override def dotString = jobName + " => " + commandLine + + /** + * Sets all field values and makes them canonical so that the graph can + * match the inputs of one function to the output of another using equals(). + */ + final override def freeze = { + freezeFieldValues + canonFieldValues super.freeze } + /** + * Sets all field values. + */ + def freezeFieldValues = { + if (jobName == null) + jobName = CommandLineFunction.nextJobName(jobNamePrefix) + + if (jobOutputFile == null) + jobOutputFile = new File(jobName + ".out") + + commandDirectory = IOUtils.subDir(IOUtils.CURRENT_DIR, commandDirectory) + } + + /** + * Makes all field values canonical so that the graph can match the + * inputs of one function to the output of another using equals(). + */ + def canonFieldValues = { + for (field <- this.functionFields) { + var fieldValue = this.getFieldValue(field) + fieldValue = CollectionUtils.updated(fieldValue, canon).asInstanceOf[AnyRef] + this.setFieldValue(field, fieldValue) + } + } + + /** + * Set value to a uniform value across functions. + * Base implementation changes any relative path to an absolute path. + * @param value to be updated + * @returns the modified value, or a copy if the value is immutable + */ + protected def canon(value: Any) = { + value match { + case file: File => absolute(file) + case fileProvider: FileProvider => fileProvider.file = absolute(fileProvider.file); fileProvider + case x => x + } + } + + /** + * Returns the absolute path to the file relative to the job command directory. + * @param file File to root relative to the command directory if it is not already absolute. + * @return The absolute path to file. + */ + private def absolute(file: File) = IOUtils.subDir(commandDirectory, file) + /** * Repeats parameters with a prefix/suffix if they are set otherwise returns "". * Skips null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString. + * @param prefix Command line prefix per parameter. + * @param params Traversable parameters. + * @param suffix Optional suffix per parameter. + * @param separator Optional separator per parameter. + * @param format Format string if the value has a value + * @return The generated string */ - protected def repeat(prefix: String, params: Seq[_], suffix: String = "", separator: String = "") = - params.filter(param => hasValue(param)).map(param => prefix + toValue(param) + suffix).mkString(separator) + protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", format: String = "%s") = + params.filter(param => hasValue(param)).map(param => prefix + toValue(param, format) + suffix).mkString(separator) /** * Returns parameter with a prefix/suffix if it is set otherwise returns "". * Does not output null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString. + * @param prefix Command line prefix per parameter. + * @param param Parameters to check for a value. + * @param suffix Optional suffix per parameter. + * @param format Format string if the value has a value + * @return The generated string */ - protected def optional(prefix: String, param: Any, suffix: String = "") = - if (hasValue(param)) prefix + toValue(param) + suffix else "" + protected def optional(prefix: String, param: Any, suffix: String = "", format: String = "%s") = + if (hasValue(param)) prefix + toValue(param, format) + suffix else "" - def missingValues = { + /** + * Returns fields that do not have values which are required. + * @return List[String] names of fields missing values. + */ + def missingFields: List[String] = { val missingInputs = missingFields(inputFields, classOf[Input]) val missingOutputs = missingFields(outputFields, classOf[Output]) - missingInputs | missingOutputs + val missingArguments = missingFields(argumentFields, classOf[Argument]) + (missingInputs | missingOutputs | missingArguments).toList.sorted } - private def missingFields(fields: List[Field], annotation: Class[_ <: Annotation]) = { + /** + * Returns fields that do not have values which are required. + * @param sources Fields to check. + * @param annotation Annotation. + * @return Set[String] names of fields missing values. + */ + private def missingFields(sources: List[ArgumentSource], annotation: Class[_ <: Annotation]): Set[String] = { var missing = Set.empty[String] - for (field <- fields) { - if (isRequired(field, annotation)) - if (!hasValue(ReflectionUtils.getValue(this, field))) - missing += field.getName + for (source <- sources) { + if (isRequired(source, annotation)) + if (!hasFieldValue(source)) + if (!exclusiveOf(source, annotation).exists(otherSource => hasFieldValue(otherSource))) + missing += "@%s: %s - %s".format(annotation.getSimpleName, source.field.getName, doc(source, annotation)) } missing } - private def isRequired(field: Field, annotationClass: Class[_ <: Annotation]) = - getAnnotationValue(field.getAnnotation(annotationClass), "required").asInstanceOf[Boolean] - - private def getAnnotationValue(annotation: Annotation, method: String) = - annotation.getClass.getMethod(method).invoke(annotation) - - protected def hasFieldValue(field: Field) = hasValue(this.getFieldValue(field)) - - private def hasValue(param: Any) = param match { - case null => false - case Nil => false - case None => false - case _ => true + /** + * Scala sugar type for checking annotation required and exclusiveOf. + */ + private type ArgumentAnnotation = { + /** + * Returns true if the field is required. + * @return true if the field is required. + */ + def required(): Boolean + /** + * Returns the comma separated list of fields that may be set instead of this field. + * @return the comma separated list of fields that may be set instead of this field. + */ + def exclusiveOf(): String + /** + * Returns the documentation for this field. + * @return the documentation for this field. + */ + def doc(): String } - private def toValue(param: Any): String = param match { - case null => "" - case Nil => "" - case None => "" - case Some(x) => x.toString - case x => x.toString + /** + * Returns the isRequired value from the field. + * @param field Field to check. + * @param annotation Annotation. + * @return the isRequired value from the field annotation. + */ + private def isRequired(field: ArgumentSource, annotation: Class[_ <: Annotation]) = + ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].required + + /** + * Returns an array of ArgumentSources from functionFields listed in the exclusiveOf of the original field + * @param field Field to check. + * @param annotation Annotation. + * @return the Array[ArgumentSource] that may be set instead of the field. + */ + private def exclusiveOf(field: ArgumentSource, annotation: Class[_ <: Annotation]) = + ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].exclusiveOf + .split(",").map(_.trim).filter(_.length > 0) + .map(fieldName => functionFields.find(fieldName == _.field.getName) match { + case Some(x) => x + case None => throw new QException("Unable to find exclusion field %s on %s".format(fieldName, this.getClass.getSimpleName)) + }) + + /** + * Returns the doc value from the field. + * @param field Field to check. + * @param annotation Annotation. + * @return the doc value from the field annotation. + */ + private def doc(field: ArgumentSource, annotation: Class[_ <: Annotation]) = + ReflectionUtils.getAnnotation(field.field, annotation).asInstanceOf[ArgumentAnnotation].doc + + /** + * Returns true if the field has a value. + * @param source Field to check for a value. + * @return true if the field has a value. + */ + protected def hasFieldValue(source: ArgumentSource) = this.hasValue(this.getFieldValue(source)) + + /** + * Returns false if the value is null or an empty collection. + * @param value Value to test for null, or a collection to test if it is empty. + * @return false if the value is null, or false if the collection is empty, otherwise true. + */ + private def hasValue(param: Any) = CollectionUtils.isNotNullOrNotEmpty(param) + + /** + * Returns "" if the value is null or an empty collection, otherwise return the value.toString. + * @param value Value to test for null, or a collection to test if it is empty. + * @param format Format string if the value has a value + * @return "" if the value is null, or "" if the collection is empty, otherwise the value.toString. + */ + private def toValue(param: Any, format: String): String = if (CollectionUtils.isNullOrEmpty(param)) "" else + param match { + case Some(x) => format.format(x) + case x => format.format(x) + } + + /** + * Gets the value of a field. + * @param source Field to get the value for. + * @return value of the field. + */ + def getFieldValue(source: ArgumentSource) = ReflectionUtils.getValue(invokeObj(source), source.field) + + /** + * Gets the value of a field. + * @param source Field to set the value for. + * @return value of the field. + */ + def setFieldValue(source: ArgumentSource, value: Any) = ReflectionUtils.setValue(invokeObj(source), source.field, value) + + /** + * Walks gets the fields in this object or any collections in that object + * recursively to find the object holding the field to be retrieved or set. + * @param source Field find the invoke object for. + * @return Object to invoke the field on. + */ + private def invokeObj(source: ArgumentSource) = source.parentFields.foldLeft[AnyRef](this)(ReflectionUtils.getValue(_, _)) +} + +/** + * A command line that will be run in a pipeline. + */ +object CommandLineFunction { + /** A semi-unique job prefix using the host name and the process id. */ + private val processNamePrefix = "Q-" + { + var prefix = ManagementFactory.getRuntimeMXBean.getName + val index = prefix.indexOf(".") + if (index >= 0) + prefix = prefix.substring(0, index) + prefix + } + + /** Job index counter for this run of Queue. */ + private var jobIndex = 0 + + /** + * Returns the next job name using the prefix. + * @param prefix Prefix of the job name. + * @return the next job name. + */ + private def nextJobName(prefix: String) = { + jobIndex += 1 + prefix + "-" + jobIndex } } diff --git a/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala deleted file mode 100644 index eb70e31f7..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/DispatchFunction.scala +++ /dev/null @@ -1,93 +0,0 @@ -package org.broadinstitute.sting.queue.function - -import java.io.File -import java.lang.management.ManagementFactory -import org.broadinstitute.sting.queue.function.scattergather.{Gather, SimpleTextGatherFunction} -import org.broadinstitute.sting.queue.util.IOUtils -import org.broadinstitute.sting.commandline.{ClassType, Output, Input} - -trait DispatchFunction extends InputOutputFunction { - def commandLine: String - - @Input(doc="Upper memory limit", required=false) - @ClassType(classOf[Int]) - var memoryLimit: Option[Int] = None - - /** - * The directory where the command should run. - */ - @Input(doc="Directory to write any files", required=false) - var commandDirectory: File = IOUtils.CURRENT_DIR - - @Input(doc="Prefix for automatic job name creation", required=false) - var jobNamePrefix: String = _ - - @Input(doc="Job name to run on the farm", required=false) - var jobName: String = _ - - @Output(doc="File to redirect any output", required=false) - @Gather(classOf[SimpleTextGatherFunction]) - var jobOutputFile: File = _ - - @Output(doc="File to redirect any errors", required=false) - @Gather(classOf[SimpleTextGatherFunction]) - var jobErrorFile: File = _ - - @Input(doc="Job project to run the command", required=false) - var jobProject = "Queue" - - @Input(doc="Job queue to run the command", required=false) - var jobQueue = "broad" - - override def freeze = { - if (jobNamePrefix == null) - jobNamePrefix = DispatchFunction.processNamePrefix - - if (jobName == null) - jobName = DispatchFunction.nextJobName(jobNamePrefix) - - if (jobOutputFile == null) - jobOutputFile = new File(jobName + ".out") - - if (jobErrorFile == null) - jobErrorFile = new File(jobName + ".err") - - commandDirectory = IOUtils.absolute(IOUtils.CURRENT_DIR, commandDirectory) - - super.freeze - } - - override def dotString = jobName + " => " + commandLine - - /** - * Override the canon function to change any relative path to an absolute path. - */ - override protected def canon(value: Any) = { - value match { - case file: File => IOUtils.absolute(commandDirectory, file) - case x => super.canon(x) - } - } - - def absolute(file: File) = IOUtils.absolute(commandDirectory, file) - def temp(subDir: String) = IOUtils.sub(commandDirectory, jobName + "-" + subDir) - - override def toString = commandLine -} - -object DispatchFunction { - private val processNamePrefix = "Q-" + { - var prefix = ManagementFactory.getRuntimeMXBean.getName - val index = prefix.indexOf(".") - if (index >= 0) - prefix = prefix.substring(0, index) - prefix - } - - private var jobIndex = 0 - - private def nextJobName(prefix: String) = { - jobIndex += 1 - prefix + "-" + jobIndex - } -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala index 6bcafa87a..83e1557ea 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/DispatchWaitFunction.scala @@ -2,10 +2,14 @@ package org.broadinstitute.sting.queue.function import java.io.File +/** An internal class that is used by bsub to wait on all other jobs before exiting. */ class DispatchWaitFunction extends CommandLineFunction { + /** + * Returns the command line "echo". + * @return echo + */ def commandLine = "echo" jobQueue = "short" jobOutputFile = File.createTempFile("Q-wait", ".out") - jobErrorFile = File.createTempFile("Q-wait", ".err") } diff --git a/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala b/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala new file mode 100644 index 000000000..b139cfff6 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/function/FileProvider.scala @@ -0,0 +1,11 @@ +package org.broadinstitute.sting.queue.function + +import java.io.File + +/** + * An trait for @Input or @Output CommandLineFunction fields that are not files, but have a File that can be get/set. + */ +trait FileProvider { + /** Gets/Sets the file. */ + var file: File +} diff --git a/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala deleted file mode 100644 index 5d686437b..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/InputOutputFunction.scala +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.queue.function - -import java.lang.reflect.Field -import org.broadinstitute.sting.queue.util._ -import org.broadinstitute.sting.commandline.{Input, Output} - -/** - * A function with @Inputs and @Outputs tagging fields that can be set by the user in a QScript - */ -trait InputOutputFunction extends QFunction with Cloneable { - def getFieldValue(field: Field) = ReflectionUtils.getValue(this, field) - def setFieldValue(field: Field, value: Any) = ReflectionUtils.setValue(this, field, value) - - def functionFields: List[Field] = inputFields ::: outputFields - def inputFields = ReflectionUtils.filterFields(fields, classOf[Input]) - def outputFields = ReflectionUtils.filterFields(fields, classOf[Output]) - - private lazy val fields = ReflectionUtils.getAllFields(this.getClass) - // TODO: Need to handle argument collections where field is not on THIS - def inputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, inputFields)).toSet - def outputs = CollectionUtils.removeNullOrEmpty(ReflectionUtils.getFieldValues(this, outputFields)).toSet - - /** - * Sets a field value using the name of the field. - * Field must be annotated with @Input or @Output - * @return true if the value was found and set - */ - protected def addOrUpdateWithStringValue(name: String, value: String) = { - fields.find(_.getName == name) match { - case Some(field) => - val isInput = ReflectionUtils.hasAnnotation(field, classOf[Input]) - val isOutput = ReflectionUtils.hasAnnotation(field, classOf[Output]) - if (isInput || isOutput) { - ReflectionUtils.addOrUpdateWithStringValue(this, field, value) - } - true - // TODO: Need to handle argument collections where field is not on THIS - case None => false - } - } - - def cloneFunction() = clone.asInstanceOf[this.type] - // explicitly overriden so that trait function cloneFunction can use this.clone - override protected def clone = super.clone - - /** - * As the function is frozen, changes all fields to their canonical forms. - */ - override def freeze = { - for (field <- this.functionFields) - mapField(field, canon) - super.freeze - } - - def mapField(field: Field, f: Any => Any): Any = { - var fieldValue = this.getFieldValue(field) - fieldValue = CollectionUtils.updated(fieldValue, f).asInstanceOf[AnyRef] - this.setFieldValue(field, fieldValue) - fieldValue - } - - /** - * Set value to a uniform value across functions. - * The biggest example is file paths relative to the command directory in DispatchFunction - */ - protected def canon(value: Any): Any = value -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala deleted file mode 100644 index e525f58b6..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/IntervalFunction.scala +++ /dev/null @@ -1,8 +0,0 @@ -package org.broadinstitute.sting.queue.function - -import java.io.File - -trait IntervalFunction extends InputOutputFunction { - var referenceFile: File - var intervals: File -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala new file mode 100644 index 000000000..29d5d3ca7 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/function/JarCommandLineFunction.scala @@ -0,0 +1,15 @@ +package org.broadinstitute.sting.queue.function + +import org.broadinstitute.sting.commandline.Argument +import java.io.File + +/** + * Defines a command line function that runs from a jar file. + */ +trait JarCommandLineFunction extends CommandLineFunction { + @Argument(doc="jar") + var jarFile: File = _ + + def commandLine = "java%s -Djava.io.tmpdir=%s -jar %s" + .format(optional(" -Xmx", memoryLimit, "g"), jobTempDir, jarFile) +} diff --git a/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala index cd4b14246..a1d28df21 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/MappingFunction.scala @@ -1,9 +1,15 @@ package org.broadinstitute.sting.queue.function +import java.io.File + /** * Utility class to map a set of inputs to set of outputs. * The QGraph uses this function internally to map between user defined functions. */ -class MappingFunction(val inputs: Set[Any], val outputs: Set[Any]) extends QFunction { - override def toString = "" // For debugging +class MappingFunction(val inputs: Set[File], val outputs: Set[File]) extends QFunction { + /** + * For debugging purposes returns . + * @returns + */ + override def toString = "" } diff --git a/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index 491e4c887..68a4bf4bc 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -1,5 +1,7 @@ package org.broadinstitute.sting.queue.function +import java.io.File + /** * The base interface for all functions in Queue. * Inputs and outputs are specified as Sets of values. @@ -16,12 +18,15 @@ trait QFunction { /** * Set of inputs for this function. */ - def inputs: Set[Any] + def inputs: Set[File] /** * Set of outputs for this function. */ - def outputs: Set[Any] + def outputs: Set[File] + /** + * The function description in .dot files + */ def dotString = "" } diff --git a/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala deleted file mode 100644 index b509fceb9..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/gatk/GatkFunction.scala +++ /dev/null @@ -1,38 +0,0 @@ -package org.broadinstitute.sting.queue.function.gatk - -import java.io.File -import org.broadinstitute.sting.queue.function.IntervalFunction -import org.broadinstitute.sting.queue.function.scattergather.{Scatter, ScatterGatherableFunction, IntervalScatterFunction} -import org.broadinstitute.sting.commandline.{ClassType, Input} -import org.apache.log4j.Level - -trait GatkFunction extends ScatterGatherableFunction with IntervalFunction { - @Input(doc="Temporary directory to write any files", required=false) - var javaTmpDir: String = _ - - @Input(doc="GATK jar") - var gatkJar: String = _ - - @Input(doc="Reference fasta") - var referenceFile: File = _ - - @Input(doc="Bam files", required=false) - @ClassType(classOf[File]) - var bamFiles: List[File] = Nil - - @Input(doc="Intervals", required=false) - @Scatter(classOf[IntervalScatterFunction]) - var intervals: File = _ - - @Input(doc="DBSNP", required=false) - var dbsnp: File = _ - - @Input(doc="Logging level", required=false) - var gatkLoggingLevel: String = _ - - protected def gatkCommandLine(walker: String) = - "java%s%s -jar %s -T %s -R %s%s%s%s%s " - .format(optional(" -Xmx", memoryLimit, "g"), optional(" -Djava.io.tmpdir=", javaTmpDir), - gatkJar, walker, referenceFile, repeat(" -I ", bamFiles), optional(" -l ", gatkLoggingLevel), - optional(" -D ", dbsnp), optional(" -L ", intervals)) -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala deleted file mode 100644 index b433ee10e..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/BamGatherFunction.scala +++ /dev/null @@ -1,17 +0,0 @@ -package org.broadinstitute.sting.queue.function.scattergather - -import java.io.File -import org.broadinstitute.sting.commandline.Input - -class BamGatherFunction extends GatherFunction { - type GatherType = File - - @Input(doc="Picard MergeSamFiles.jar. At the Broad this can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/") - var picardMergeSamFilesJar: String = _ - - @Input(doc="Compression level 1-9", required=false) - var picardMergeCompressionLevel: Option[Int] = None - - def commandLine = "java -jar %s%s%s%s".format(picardMergeSamFilesJar, - optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " AS=true VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts)) -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala index a3ebc953b..cd6b9bf38 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CleanupTempDirsFunction.scala @@ -1,15 +1,24 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.queue.function.CommandLineFunction -import org.broadinstitute.sting.commandline.Input import java.io.File +import org.broadinstitute.sting.commandline.{Argument, Input} +/** + * Removes the temporary directories for scatter / gather. + * The script can be changed by setting rmdirScript. + * By default uses rm -rf. + * The format of the call is [.. ] + */ class CleanupTempDirsFunction extends CommandLineFunction { @Input(doc="Original outputs of the gather functions") - var originalOutputs: Set[Any] = Set.empty[Any] + var originalOutputs: Set[File] = Set.empty[File] @Input(doc="Temporary directories to be deleted") var tempDirectories: List[File] = Nil - def commandLine = "rm -rf%s".format(repeat(" '", tempDirectories, "'")) + @Argument(doc="rmdir script or command") + var rmdirScript = "rm -rf" + + def commandLine = "%s%s".format(rmdirScript, repeat(" '", tempDirectories, "'")) } diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala deleted file mode 100755 index 613c17e35..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ContigScatterFunction.scala +++ /dev/null @@ -1,21 +0,0 @@ -package org.broadinstitute.sting.queue.function.scattergather - -import java.io.File -import org.broadinstitute.sting.commandline.Input -import org.broadinstitute.sting.queue.function.IntervalFunction - -class ContigScatterFunction extends ScatterFunction { - type ScatterType = File - - @Input(doc="Reference file to scatter") - var referenceFile: File = _ - - override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = { - val command = originalFunction.asInstanceOf[IntervalFunction] - referenceFile = command.referenceFile - super.setOriginalFunction(originalFunction) - } - - // TODO: Use the reference file for "all" - def commandLine = "splitIntervalsByContig.py %s%s".format(originalInput, repeat(" ", scatterParts)) -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala index de1a16652..67161169b 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/CreateTempDirsFunction.scala @@ -2,25 +2,28 @@ package org.broadinstitute.sting.queue.function.scattergather import java.io.File import org.broadinstitute.sting.queue.function.CommandLineFunction -import org.broadinstitute.sting.commandline.{Output, Input} +import org.broadinstitute.sting.commandline.{Argument, Output, Input} +/** + * Creates the temporary directories for scatter / gather. + * The script can be changed by setting mkdirScript. + * By default uses mkdir -pv + * The format of the call is [.. ] + */ class CreateTempDirsFunction extends CommandLineFunction { @Input(doc="Original inputs to the scattered function") - var originalInputs: Set[Any] = Set.empty[Any] + var originalInputs: Set[File] = Set.empty[File] @Output(doc="Temporary directories to create") var tempDirectories: List[File] = Nil - @Input(doc="Sleep seconds", required=false) - var mkdirSleepSeconds: Option[Int] = None + @Argument(doc="mkdir script or command") + var mkdirScript = "mkdir -pv" - // TODO: After port of LSF submitter use -cwd

instead of trying to run from the directory - // For now, create the directory so that BroadCore can run bsub from it -kshakir July 27, 2010 on chartl's computer + def commandLine = "%s%s".format(mkdirScript, repeat(" '", tempDirectories, "'")) - override def freeze = { - super.freeze - tempDirectories.foreach(_.mkdirs) - } - - def commandLine = "mkdir -pv%s%s".format(repeat(" '", tempDirectories, "'"), optional(" && sleep ", mkdirSleepSeconds)) + /** + * This function is creating the directories, so returns just this command directory. + */ + override def jobDirectories = Set(commandDirectory) } diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala deleted file mode 100644 index 6a36236ce..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/FixMatesGatherFunction.scala +++ /dev/null @@ -1,17 +0,0 @@ -package org.broadinstitute.sting.queue.function.scattergather - -import java.io.File -import org.broadinstitute.sting.commandline.Input - -class FixMatesGatherFunction extends GatherFunction { - type GatherType = File - - @Input(doc="Picard FixMateInformation.jar. At the Broad this can be found at /seq/software/picard/current/bin/FixMateInformation.jar. Outside the broad see http://picard.sourceforge.net/") - var picardFixMatesJar: String = _ - - @Input(doc="Compression level 1-9", required=false) - var picardMergeCompressionLevel: Option[Int] = None - - def commandLine = "java -Djava.io.tmpdir=/broad/shptmp/queue -jar %s%s%s%s".format(picardFixMatesJar, - optional(" COMPRESSION_LEVEL=", picardMergeCompressionLevel), " VALIDATION_STRINGENCY=SILENT SO=coordinate OUTPUT=" + originalOutput, repeat(" INPUT=", gatherParts)) -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala index 3bced51ee..f5886865a 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/GatherFunction.scala @@ -1,20 +1,31 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.queue.function.{CommandLineFunction} -import org.broadinstitute.sting.commandline.{Input, Output} +import java.io.File +import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output} /** * Base class for Gather command line functions. - * NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits. */ -abstract class GatherFunction extends CommandLineFunction { - type GatherType - +trait GatherFunction extends CommandLineFunction { @Input(doc="Parts to gather back into the original output") - var gatherParts: List[GatherType] = Nil + var gatherParts: List[File] = Nil @Output(doc="The original output of the scattered function") - var originalOutput: GatherType = _ + var originalOutput: File = _ - def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {} + /** + * Sets the original function used to create this scatter function. + * @param originalFunction The ScatterGatherableFunction. + * @param gatherField The field being gathered. + */ + def setOriginalFunction(originalFunction: ScatterGatherableFunction, gatherField: ArgumentSource) = {} + + /** + * Sets the clone function creating one of the inputs for this gather function. + * @param cloneFunction The clone of the ScatterGatherableFunction. + * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. + * @param gatherField The field to be gathered. + */ + def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, gatherField: ArgumentSource) = {} } diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala deleted file mode 100644 index 8408622c0..000000000 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/IntervalScatterFunction.scala +++ /dev/null @@ -1,21 +0,0 @@ -package org.broadinstitute.sting.queue.function.scattergather - -import java.io.File -import org.broadinstitute.sting.commandline.Input -import org.broadinstitute.sting.queue.function.IntervalFunction - -class IntervalScatterFunction extends ScatterFunction { - type ScatterType = File - - @Input(doc="Reference file to scatter") - var referenceFile: File = _ - - override def setOriginalFunction(originalFunction: ScatterGatherableFunction) = { - val command = originalFunction.asInstanceOf[IntervalFunction] - referenceFile = command.referenceFile - super.setOriginalFunction(originalFunction) - } - - // TODO: Use the reference file for "all" - def commandLine = "splitIntervals.sh %s%s".format(originalInput, repeat(" ", scatterParts)) -} diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala index 05320ccb8..b0a8ab794 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterFunction.scala @@ -2,23 +2,33 @@ package org.broadinstitute.sting.queue.function.scattergather import org.broadinstitute.sting.queue.function.CommandLineFunction import java.io.File -import org.broadinstitute.sting.commandline.{Input, Output} +import org.broadinstitute.sting.commandline.{ArgumentSource, Input, Output} /** * Base class for Scatter command line functions. - * NOTE: Using an abstract class instead of a trait due to scala parameterized type erasure on traits. */ -abstract class ScatterFunction extends CommandLineFunction { - type ScatterType - +trait ScatterFunction extends CommandLineFunction { @Input(doc="Original input to scatter") - var originalInput: ScatterType = _ + var originalInput: File = _ + + @Output(doc="Scattered parts of the original input, one per temp directory") + var scatterParts: List[File] = Nil @Input(doc="Temporary directories for each scatter part") var tempDirectories: List[File] = Nil - @Output(doc="Scattered parts of the original input, one per temp directory") - var scatterParts: List[ScatterType] = Nil + /** + * Sets the original function used to create this scatter function. + * @param originalFunction The ScatterGatherableFunction. + * @param scatterField The field being scattered. + */ + def setOriginalFunction(originalFunction: ScatterGatherableFunction, scatterField: ArgumentSource) = {} - def setOriginalFunction(originalFunction: ScatterGatherableFunction) = {} + /** + * Sets the clone function using one of the outputs of this scatter function. + * @param cloneFunction The clone of the ScatterGatherableFunction. + * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. + * @param scatterField The field being scattered. + */ + def setCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int, scatterField: ArgumentSource) = {} } diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala index a70263dd0..aa54f5672 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala @@ -1,141 +1,367 @@ package org.broadinstitute.sting.queue.function.scattergather -import org.broadinstitute.sting.queue.function.CommandLineFunction -import java.lang.reflect.Field import java.io.File import org.broadinstitute.sting.queue.util._ -import org.broadinstitute.sting.commandline.Input +import org.broadinstitute.sting.commandline.ArgumentSource +import org.broadinstitute.sting.queue.function.CommandLineFunction +import com.rits.cloning.Cloner +/** + * A function that can be run faster by splitting it up into pieces and then joining together the results. + */ trait ScatterGatherableFunction extends CommandLineFunction { - @Input(doc="Number of parts to scatter the function into") + /** Number of parts to scatter the function into" */ var scatterCount: Int = 1 - def scatterField = this.inputFields.find(field => ReflectionUtils.hasAnnotation(field, classOf[Scatter])).get + /** scatter gather directory */ + var scatterGatherDirectory: File = _ - def scatterGatherable = { - if (scatterCount < 2) - false - else if (!hasFieldValue(scatterField)) - false - else - true - } + /** cleanup temporary directories */ + var cleanupTempDirectories = false - def generateFunctions() = ScatterGatherableFunction.generateFunctions(this) -} + /** Class to use for creating temporary directories. Defaults to CreateTempDirsFunction. */ + var createTempDirsClass: Class[_ <: CreateTempDirsFunction] = _ -object ScatterGatherableFunction { - private def generateFunctions(originalFunction: ScatterGatherableFunction) = { + /** Class to use for scattering. Defaults to the annotation used in the @Scatter tag. */ + var scatterClass: Class[_ <: ScatterFunction] = _ + + /** + * Function that returns the class to use for gathering a directory. If it returns null then @Gather annotation will be used. + * @param gatherField Field that is to be gathered. + * @return The class of the GatherFunction to be used or null. + */ + var gatherClass: PartialFunction[ArgumentSource, Class[_ <: GatherFunction]] = _ + + /** Class to use for removing temporary directories. Defaults to CleanupTempDirsFunction. */ + var cleanupTempDirsClass: Class[_ <: CleanupTempDirsFunction] = _ + + /** + * Allows external modification of the CreateTempDirsFunction that will create the temporary directories. + * @param initializeFunction The function that will create the temporary directories. + * @param inputFields The input fields that the original function was dependent on. + */ + var setupInitializeFunction: PartialFunction[(CreateTempDirsFunction, List[ArgumentSource]), Unit] = _ + + /** + * Allows external modification of the ScatterFunction that will create the scatter pieces in the temporary directories. + * @param scatterFunction The function that will create the scatter pieces in the temporary directories. + * @param scatterField The input field being scattered. + */ + var setupScatterFunction: PartialFunction[(ScatterFunction, ArgumentSource), Unit] = _ + + /** + * Allows external modification of the GatherFunction that will collect the gather pieces in the temporary directories. + * @param gatherFunction The function that will merge the gather pieces from the temporary directories. + * @param gatherField The output field being gathered. + */ + var setupGatherFunction: PartialFunction[(GatherFunction, ArgumentSource), Unit] = _ + + /** + * Allows external modification of the cloned function. + * @param cloneFunction The clone of this ScatterGatherableFunction + * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. + */ + var setupCloneFunction: PartialFunction[(ScatterGatherableFunction, Int), Unit] = _ + + /** + * Allows external modification of the CleanupTempDirsFunction that will remove the temporary directories. + * @param cleanupFunction The function that will remove the temporary directories. + * @param gatherFunctions The functions that will gather up the original output fields. + * @param outputFields The output fields that the original function was dependent on. + */ + var setupCleanupFunction: PartialFunction[(CleanupTempDirsFunction, Map[ArgumentSource, GatherFunction], List[ArgumentSource]), Unit] = _ + + /** + * Returns true if the function is ready to be scatter / gathered. + * The base implementation checks if the scatter count is greater than one, + * and that the scatter field has a value. + * @return true if the function is ready to be scatter / gathered. + */ + def scatterGatherable = this.scatterCount > 1 && hasFieldValue(this.scatterField) + + /** + * Returns a list of scatter / gather and clones of this function + * that can be run in parallel to produce the same output as this + * command line function. + * @return List[CommandLineFunction] to run instead of this function. + */ + def generateFunctions() = { var functions = List.empty[CommandLineFunction] var tempDirectories = List.empty[File] - // Create a function that will remove any temporary items - var cleanupFunction = new CleanupTempDirsFunction - cleanupFunction.properties = originalFunction.properties - cleanupFunction.jobNamePrefix = originalFunction.jobNamePrefix - cleanupFunction.commandDirectory = originalFunction.commandDirectory - - // Find the field with @Scatter and its value - var scatterField = originalFunction.scatterField - val originalValue = originalFunction.getFieldValue(scatterField) + // Only depend on input fields that have a value + val inputFieldsWithValues = this.inputFields.filter(hasFieldValue(_)) + // Only gather up fields that will have a value + val outputFieldsWithValues = this.outputFields.filter(hasFieldValue(_)) // Create the scatter function based on @Scatter - val scatterFunction = getScatterFunction(scatterField) - scatterFunction.setOriginalFunction(originalFunction) - scatterFunction.properties = originalFunction.properties - scatterFunction.jobNamePrefix = originalFunction.jobNamePrefix - scatterFunction.commandDirectory = originalFunction.temp("scatter-" + scatterField.getName) - scatterFunction.originalInput = originalValue.asInstanceOf[scatterFunction.ScatterType] + val scatterFunction = this.newScatterFunction(this.scatterField) + initScatterFunction(scatterFunction, this.scatterField) tempDirectories :+= scatterFunction.commandDirectory functions :+= scatterFunction // Create the gather functions for each output field - var gatherFunctions = Map.empty[Field, GatherFunction] - for (outputField <- originalFunction.outputFieldsWithValues) { - - // Create the gather function based on @Gather - val gatherFunction = getGatherFunction(outputField) - gatherFunction.setOriginalFunction(originalFunction) - gatherFunction.properties = originalFunction.properties - gatherFunction.jobNamePrefix = originalFunction.jobNamePrefix - gatherFunction.commandDirectory = originalFunction.temp("gather-" + outputField.getName) - - val gatheredValue = originalFunction.getFieldValue(outputField).asInstanceOf[gatherFunction.GatherType] - gatherFunction.originalOutput = gatheredValue - + var gatherFunctions = Map.empty[ArgumentSource, GatherFunction] + for (gatherField <- outputFieldsWithValues) { + val gatherFunction = this.newGatherFunction(gatherField) + initGatherFunction(gatherFunction, gatherField) tempDirectories :+= gatherFunction.commandDirectory - cleanupFunction.originalOutputs += gatheredValue - functions :+= gatherFunction - - gatherFunctions += outputField -> gatherFunction + gatherFunctions += gatherField -> gatherFunction } // Create the clone functions for running the parallel jobs var cloneFunctions = List.empty[CommandLineFunction] - for (i <- 1 to originalFunction.scatterCount) { - val cloneFunction = newFunctionClone(originalFunction) + for (i <- 1 to this.scatterCount) { + val cloneFunction = this.newCloneFunction() + initCloneFunction(cloneFunction, i) cloneFunctions :+= cloneFunction + tempDirectories :+= cloneFunction.commandDirectory - val tempDir = originalFunction.temp("temp-"+i) - cloneFunction.commandDirectory = tempDir - tempDirectories :+= tempDir - - // Reset the input of the clone to the the temp dir and add it as an output of the scatter - var scatterPart = CollectionUtils.updated(originalValue, resetToTempDir(tempDir)) - scatterFunction.scatterParts :+= scatterPart.asInstanceOf[scatterFunction.ScatterType] - cloneFunction.setFieldValue(scatterField, scatterPart) - - // For each each output field, change value to the temp dir and feed it into the gatherer - for (outputField <- originalFunction.outputFields) { - val gatherFunction = gatherFunctions(outputField) - val gatherPart = cloneFunction.mapField(outputField, resetToTempDir(tempDir)) - gatherFunction.gatherParts :+= gatherPart.asInstanceOf[gatherFunction.GatherType] - } + bindCloneFunctionScatter(scatterFunction, this.scatterField, cloneFunction, i) + // For each each output field, change value to the scatterGatherTempDir dir and feed it into the gatherer + for (gatherField <- outputFieldsWithValues) + bindCloneFunctionGather(gatherFunctions(gatherField), gatherField, cloneFunction, i) } - functions = cloneFunctions ::: functions + functions ++= cloneFunctions - // Create a function to create all of the temp directories. + // Create a function to create all of the scatterGatherTempDir directories. // All of its inputs are the inputs of the original function. - val initializeFunction = new CreateTempDirsFunction - initializeFunction.properties = originalFunction.properties - initializeFunction.jobNamePrefix = originalFunction.jobNamePrefix - initializeFunction.commandDirectory = originalFunction.commandDirectory + val initializeFunction = this.newInitializeFunction() + initInitializeFunction(initializeFunction, inputFieldsWithValues) - for (inputField <- originalFunction.inputFieldsWithValues) - initializeFunction.originalInputs += originalFunction.getFieldValue(inputField) + // Create a function that will remove any temporary items + // All of its inputs are the outputs of the original function. + var cleanupFunction = newCleanupFunction() + initCleanupFunction(cleanupFunction, gatherFunctions, outputFieldsWithValues) + // Set the temporary directories, for the initialize function as outputs for scatter and cleanup as inputs. initializeFunction.tempDirectories = tempDirectories scatterFunction.tempDirectories = tempDirectories cleanupFunction.tempDirectories = tempDirectories functions +:= initializeFunction - functions :+= cleanupFunction + if (this.cleanupTempDirectories) + functions :+= cleanupFunction // Return all the various functions we created functions } - private def resetToTempDir(tempDir: File): Any => Any = { - (any: Any) => { - any match { - case file: File => IOUtils.reset(tempDir, file) - case x => x - } - } + /** + * Sets the scatter gather directory to the command directory if it is not already set. + */ + override def freezeFieldValues = { + super.freezeFieldValues + if (this.scatterGatherDirectory == null) + this.scatterGatherDirectory = this.commandDirectory } - private def getScatterFunction(inputField: Field) = - ReflectionUtils.getAnnotation(inputField, classOf[Scatter]).value.newInstance.asInstanceOf[ScatterFunction] + /** + * Retrieves the scatter field from the first field that has the annotation @Scatter. + */ + protected lazy val scatterField = + this.inputFields.find(field => ReflectionUtils.hasAnnotation(field.field, classOf[Scatter])).get - private def getGatherFunction(outputField: Field) = - ReflectionUtils.getAnnotation(outputField, classOf[Gather]).value.newInstance.asInstanceOf[GatherFunction] + /** + * Creates a new initialize CreateTempDirsFunction that will create the temporary directories. + * @return A CreateTempDirsFunction that will create the temporary directories. + */ + protected def newInitializeFunction(): CreateTempDirsFunction = { + if (createTempDirsClass != null) + this.createTempDirsClass.newInstance + else + new CreateTempDirsFunction + } - private def newFunctionClone(originalFunction: ScatterGatherableFunction) = { - val cloneFunction = originalFunction.cloneFunction.asInstanceOf[ScatterGatherableFunction] + /** + * Initializes the CreateTempDirsFunction that will create the temporary directories. + * The initializeFunction jobNamePrefix is set so that the CreateTempDirsFunction runs with the same prefix as this ScatterGatherableFunction. + * The initializeFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction. + * The initializeFunction is modified to become dependent on the input files for this ScatterGatherableFunction. + * Calls setupInitializeFunction with initializeFunction. + * @param initializeFunction The function that will create the temporary directories. + * @param inputFields The input fields that the original function was dependent on. + */ + protected def initInitializeFunction(initializeFunction: CreateTempDirsFunction, inputFields: List[ArgumentSource]) = { + initializeFunction.jobNamePrefix = this.jobNamePrefix + initializeFunction.commandDirectory = this.commandDirectory + for (inputField <- inputFields) + initializeFunction.originalInputs ++= this.getFieldFiles(inputField) + if (this.setupInitializeFunction != null) + if (this.setupInitializeFunction.isDefinedAt(initializeFunction, inputFields)) + this.setupInitializeFunction(initializeFunction, inputFields) + } + + /** + * Creates a new ScatterFunction for the scatterField. + * @param scatterField Field that defined @Scatter. + * @return A ScatterFunction instantiated from @Scatter or scatterClass if scatterClass was set on this ScatterGatherableFunction. + */ + protected def newScatterFunction(scatterField: ArgumentSource): ScatterFunction = { + var scatterClass = this.scatterClass + if (scatterClass == null) + scatterClass = ReflectionUtils.getAnnotation(scatterField.field, classOf[Scatter]) + .value.asSubclass(classOf[ScatterFunction]) + scatterClass.newInstance.asInstanceOf[ScatterFunction] + } + + /** + * Initializes the ScatterFunction created by newScatterFunction() that will create the scatter pieces in the temporary directories. + * The scatterFunction jobNamePrefix is set so that the ScatterFunction runs with the same prefix as this ScatterGatherableFunction. + * The scatterFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory. + * The scatterFunction has it's originalInput set with the file to be scattered into scatterCount pieces. + * Calls scatterFunction.setOriginalFunction with this ScatterGatherableFunction. + * Calls setupScatterFunction with scatterFunction. + * @param scatterFunction The function that will create the scatter pieces in the temporary directories. + * @param scatterField The input field being scattered. + */ + protected def initScatterFunction(scatterFunction: ScatterFunction, scatterField: ArgumentSource) = { + scatterFunction.jobNamePrefix = this.jobNamePrefix + scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter-" + scatterField.field.getName) + scatterFunction.originalInput = this.getFieldFile(scatterField) + scatterFunction.setOriginalFunction(this, scatterField) + if (this.setupScatterFunction != null) + if (this.setupScatterFunction.isDefinedAt(scatterFunction, scatterField)) + this.setupScatterFunction(scatterFunction, scatterField) + } + + /** + * Creates a new GatherFunction for the gatherField. + * @param gatherField Field that defined @Gather. + * @return A GatherFunction instantiated from @Gather. + */ + protected def newGatherFunction(gatherField: ArgumentSource) : GatherFunction = { + var gatherClass: Class[_ <: GatherFunction] = null + if (this.gatherClass != null) + if (this.gatherClass.isDefinedAt(gatherField)) + gatherClass = this.gatherClass(gatherField) + if (gatherClass == null) + gatherClass = ReflectionUtils.getAnnotation(gatherField.field, classOf[Gather]) + .value.asSubclass(classOf[GatherFunction]) + gatherClass.newInstance.asInstanceOf[GatherFunction] + } + + /** + * Initializes the GatherFunction created by newGatherFunction() that will collect the gather pieces in the temporary directories. + * The gatherFunction jobNamePrefix is set so that the GatherFunction runs with the same prefix as this ScatterGatherableFunction. + * The gatherFunction commandDirectory is set so that the function runs from a temporary directory under the scatterDirectory. + * The gatherFunction has it's originalOutput set with the file to be gathered from the scatterCount pieces. + * Calls the gatherFunction.setOriginalFunction with this ScatterGatherableFunction. + * Calls setupGatherFunction with gatherFunction. + * @param gatherFunction The function that will merge the gather pieces from the temporary directories. + * @param gatherField The output field being gathered. + */ + protected def initGatherFunction(gatherFunction: GatherFunction, gatherField: ArgumentSource) = { + gatherFunction.jobNamePrefix = this.jobNamePrefix + gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName) + gatherFunction.originalOutput = this.getFieldFile(gatherField) + gatherFunction.setOriginalFunction(this, gatherField) + if (this.setupGatherFunction != null) + if (this.setupGatherFunction.isDefinedAt(gatherFunction, gatherField)) + this.setupGatherFunction(gatherFunction, gatherField) + } + + /** + * Creates a new clone of this ScatterGatherableFunction, setting the scatterCount to 1 so it doesn't infinitely scatter. + * @return A clone of this ScatterGatherableFunction + */ + protected def newCloneFunction(): ScatterGatherableFunction = { + val cloneFunction = ScatterGatherableFunction.cloner.deepClone(this) // Make sure clone doesn't get scattered cloneFunction.scatterCount = 1 cloneFunction } + + /** + * Initializes the cloned function created by newCloneFunction() by setting it's commandDirectory to a temporary directory under scatterDirectory. + * Calls setupCloneFunction with cloneFunction. + * @param cloneFunction The clone of this ScatterGatherableFunction + * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. + */ + protected def initCloneFunction(cloneFunction: ScatterGatherableFunction, index: Int) = { + cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+index) + if (this.setupCloneFunction != null) + if (this.setupCloneFunction.isDefinedAt(cloneFunction, index)) + this.setupCloneFunction(cloneFunction, index) + } + + /** + * Joins a piece of the ScatterFunction output to the cloned function's input. + * The input of the clone is changed to be in the output directory of the clone. + * The scatter function piece is added as an output of the scatterFunction. + * The clone function's original input is changed to use the piece from the output directory. + * Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction. + * @param scatterFunction Function that will create the pieces including the piece that will go to cloneFunction. + * @param scatterField The field to be scattered. + * @param cloneFunction Clone of this ScatterGatherableFunction. + * @param index The one based index (from 1..scatterCount inclusive) of the scatter piece. + */ + protected def bindCloneFunctionScatter(scatterFunction: ScatterFunction, scatterField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = { + // Reset the input of the clone to the the scatterGatherTempDir dir and add it as an output of the scatter + val scatterPart = IOUtils.resetParent(cloneFunction.commandDirectory, scatterFunction.originalInput) + scatterFunction.scatterParts :+= scatterPart + cloneFunction.setFieldValue(scatterField, scatterPart) + scatterFunction.setCloneFunction(cloneFunction, index, scatterField) + } + + /** + * Joins the cloned function's output as a piece of the GatherFunction's input. + * Finally the scatterFunction.setCloneFunction is called with the clone of this ScatterGatherableFunction. + * @param cloneFunction Clone of this ScatterGatherableFunction. + * @param gatherFunction Function that will create the pieces including the piece that will go to cloneFunction. + * @param gatherField The field to be gathered. + */ + protected def bindCloneFunctionGather(gatherFunction: GatherFunction, gatherField: ArgumentSource, cloneFunction: ScatterGatherableFunction, index: Int) = { + val gatherPart = cloneFunction.resetFieldFile(gatherField, cloneFunction.commandDirectory) + gatherFunction.gatherParts :+= gatherPart + gatherFunction.setCloneFunction(cloneFunction, index, gatherField) + } + + /** + * Creates a new function that will remove the temporary directories. + * @return A CleanupTempDirs function that will remove the temporary directories. + */ + protected def newCleanupFunction(): CleanupTempDirsFunction = { + if (cleanupTempDirsClass != null) + this.cleanupTempDirsClass.newInstance + else + new CleanupTempDirsFunction + } + + /** + * Initializes the CleanupTempDirsFunction created by newCleanupFunction() that will remove the temporary directories. + * The cleanupFunction jobNamePrefix is set so that the CleanupTempDirsFunction runs with the same prefix as this ScatterGatherableFunction. + * The cleanupFunction commandDirectory is set so that the function runs in the directory as this ScatterGatherableFunction. + * The initializeFunction is modified to become dependent on the output files for this ScatterGatherableFunction. + * Calls setupCleanupFunction with cleanupFunction. + * @param cleanupFunction The function that will remove the temporary directories. + * @param gatherFunctions The functions that will gather up the original output fields. + * @param outputFields The output fields that the original function was dependent on. + */ + protected def initCleanupFunction(cleanupFunction: CleanupTempDirsFunction, gatherFunctions: Map[ArgumentSource, GatherFunction], outputFields: List[ArgumentSource]) = { + cleanupFunction.jobNamePrefix = this.jobNamePrefix + cleanupFunction.commandDirectory = this.commandDirectory + for (gatherField <- outputFields) + cleanupFunction.originalOutputs += gatherFunctions(gatherField).originalOutput + if (this.setupCleanupFunction != null) + if (this.setupCleanupFunction.isDefinedAt(cleanupFunction, gatherFunctions, outputFields)) + this.setupCleanupFunction(cleanupFunction, gatherFunctions, outputFields) + } + + /** + * Returns a temporary directory under this scatter gather directory. + * @param Sub directory under the scatter gather directory. + * @return temporary directory under this scatter gather directory. + */ + private def scatterGatherTempDir(subDir: String) = IOUtils.subDir(this.scatterGatherDirectory, this.jobName + "-" + subDir) +} + +/** + * A function that can be run faster by splitting it up into pieces and then joining together the results. + */ +object ScatterGatherableFunction { + /** Used to deep clone a ScatterGatherableFunction. */ + private lazy val cloner = new Cloner } diff --git a/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala index 070c36115..9a5681e4d 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/scattergather/SimpleTextGatherFunction.scala @@ -1,10 +1,16 @@ package org.broadinstitute.sting.queue.function.scattergather -import java.io.File +import org.broadinstitute.sting.commandline.Argument +/** + * Merges a text file. + * The script can be changed by setting rmdirScript. + * By default uses mergeText.sh in Sting/shell. + * The format of the call is [.. ] + */ class SimpleTextGatherFunction extends GatherFunction { - type GatherType = File + @Argument(doc="merge text script") + var mergeTextScript = "mergeText.sh" - // TODO: Write a text merging utility that takes into account headers. - def commandLine = "mergeText.sh %s%s".format(originalOutput, repeat(" ", gatherParts)) + def commandLine = "%s %s%s".format(mergeTextScript, originalOutput, repeat(" ", gatherParts)) } diff --git a/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala index f3a0f43b5..36bc97a7c 100755 --- a/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/ClasspathUtils.scala @@ -4,14 +4,32 @@ import collection.JavaConversions._ import org.reflections.util.ManifestAwareClasspathHelper import java.io.File import javax.print.URIException +import java.net.{URL, URLClassLoader} /** * Builds the correct class path by examining the manifests */ object ClasspathUtils { + + /** + * Returns a list of files that build up the classpath, taking into account jar file manifests. + * @return List[File] that build up the current classpath. + */ def manifestAwareClassPath = { var urls = ManifestAwareClasspathHelper.getUrlsForManifestCurrentClasspath - var files = urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)}) - files.mkString(File.pathSeparator) + urls.map(url => try {new File(url.toURI)} catch {case urie: URIException => new File(url.getPath)}) + } + + /** + * Adds the directory to the system class loader classpath using reflection. + * HACK: Uses reflection to modify the class path, and assumes loader is a URLClassLoader + * @param path Directory to add to the system class loader classpath. + */ + def addClasspath(path: File): Unit = { + val url = path.toURI.toURL + val method = classOf[URLClassLoader].getDeclaredMethod("addURL", classOf[URL]); + if (!method.isAccessible) + method.setAccessible(true); + method.invoke(ClassLoader.getSystemClassLoader(), url); } } diff --git a/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala index b384c8dfa..6871d8f4b 100644 --- a/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/CollectionUtils.scala @@ -1,18 +1,16 @@ package org.broadinstitute.sting.queue.util /** - * Utilities that try to deeply apply operations to collections + * Utilities that try to deeply apply operations to collections, specifically Traversable and Option. */ object CollectionUtils { - def test(value: Any, f: Any => Boolean): Boolean = { - var result = f(value) - foreach(value, (item, collection) => { - result |= f(item) - }) - result - } - + /** + * Loops though a collection running the function f on each value. + * @param value The value to run f on, or a collection of values for which f should be run on. + * @param f The function to run on value, or to run on the values within the collection. + * @return The updated value. + */ def updated(value: Any, f: Any => Any): Any = { value match { case traversable: Traversable[_] => traversable.map(updated(_, f)) @@ -21,6 +19,11 @@ object CollectionUtils { } } + /** + * Utility for recursively processing collections. + * @param value Initial the collection to be processed + * @param f a function that will be called for each (item, collection) in the initial collection + */ def foreach(value: Any, f: (Any, Any) => Unit): Unit = { value match { case traversable: Traversable[_] => @@ -37,11 +40,24 @@ object CollectionUtils { } } - // Because scala allows but throws NPE when trying to hash a collection with a null in it. - // http://thread.gmane.org/gmane.comp.lang.scala.internals/3267 - // https://lampsvn.epfl.ch/trac/scala/ticket/2935 - def removeNullOrEmpty[T](value: T): T = filterNotNullOrNotEmpty(value) + /** + * Utility for recursively processing collections. + * @param value Initial the collection to be processed + * @param f a function that will be called for each (item, collection) in the initial collection + */ + def foreach(value: Any, f: (Any) => Unit): Unit = { + value match { + case traversable: Traversable[_] => traversable.foreach(f(_)) + case option: Option[_] => option.foreach(f(_)) + case item => f(item) + } + } + /** + * Removes empty values from collections. + * @param value The collection to test. + * @return The value if it is not a collection, otherwise the collection with nulls and empties removed. + */ private def filterNotNullOrNotEmpty[T](value: T): T = { val newValue = value match { case traversable: Traversable[_] => traversable.map(filterNotNullOrNotEmpty(_)).filter(isNotNullOrNotEmpty(_)).asInstanceOf[T] @@ -51,7 +67,20 @@ object CollectionUtils { newValue } - private def isNotNullOrNotEmpty(value: Any): Boolean = { + + /** + * Returns true if the value is null or an empty collection. + * @param value Value to test for null, or a collection to test if it is empty. + * @return true if the value is null, or false if the collection is empty, otherwise true. + */ + def isNullOrEmpty(value: Any): Boolean = !isNotNullOrNotEmpty(value) + + /** + * Returns false if the value is null or an empty collection. + * @param value Value to test for null, or a collection to test if it is empty. + * @return false if the value is null, or false if the collection is empty, otherwise true. + */ + def isNotNullOrNotEmpty(value: Any): Boolean = { val result = value match { case traversable: Traversable[_] => !filterNotNullOrNotEmpty(traversable).isEmpty case option: Option[_] => !filterNotNullOrNotEmpty(option).isEmpty diff --git a/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala b/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala new file mode 100644 index 000000000..0b322301e --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala @@ -0,0 +1,51 @@ +package org.broadinstitute.sting.queue.util + +import java.io.File + +/** + * Base class for a command line job. + */ +abstract class CommandLineJob { + var command: String = _ + var workingDir: File = _ + var inputFile: File = _ + var outputFile: File = _ + var errorFile: File = _ + + /** + * Runs the command, either immediately or dispatching it to a compute farm. + * If it is dispatched to a compute farm it should not start until jobs it depends on are finished. + */ + def run() + + /** + * Returns the content of a command output. + * @param streamOutput The output of the command. + * @return The content of the command, along with a message if it was truncated. + */ + protected def content(streamOutput: ProcessController.StreamOutput) = { + var content = streamOutput.content + if (streamOutput.contentTruncated) + content += "%n%n".format() + content + } + + /** + * Returns the ProcessController for this thread. + * @return The ProcessController for this thread. + */ + protected def processController = CommandLineJob.threadProcessController.get + + /** A five mb limit of characters for display. */ + protected val FIVE_MB = 1024 * 512 * 5; +} + +/** + * Base class for a command line job. + */ +object CommandLineJob { + /** Thread local process controller container. */ + private val threadProcessController = new ThreadLocal[ProcessController] { + override def initialValue = new ProcessController + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala index 5fa902391..4a9fa8f4a 100644 --- a/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala @@ -2,30 +2,69 @@ package org.broadinstitute.sting.queue.util import java.io.{IOException, File} +/** + * A collection of utilities for modifying java.io. + */ object IOUtils { + /** The current directory "." */ val CURRENT_DIR = new File(".") - def sub(parent: File, subPath: String) = { - val file = new File(subPath) + + /** + * Returns the sub path rooted at the parent. + * If the sub path is already absolute, returns the sub path. + * If the parent is the current directory, returns the sub path. + * If the sub bath is the current directory, returns the parent. + * Else returns new File(parent, subPath) + * @param parent The parent directory + * @param path The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + def subDir(dir: File, path: String): File = + subDir(dir.getAbsoluteFile, new File(path)) + + /** + * Returns the sub path rooted at the parent. + * If the sub path is already absolute, returns the sub path. + * If the parent is the current directory, returns the sub path. + * If the sub bath is the current directory, returns the parent. + * Else returns new File(parent, subPath) + * @param parent The parent directory + * @param file The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + def subDir(parent: File, file: File): File = { if (parent == CURRENT_DIR && file == CURRENT_DIR) - CURRENT_DIR.getCanonicalFile + CURRENT_DIR.getCanonicalFile.getAbsoluteFile else if (parent == CURRENT_DIR || file.isAbsolute) - file + file.getAbsoluteFile else if (file == CURRENT_DIR) - parent + parent.getAbsoluteFile else - new File(parent, subPath) + new File(parent, file.getPath).getAbsoluteFile } - def temp(prefix: String, suffix: String = "") = { - val tempDir = File.createTempFile(prefix + "-", suffix) - if(!tempDir.delete) - throw new IOException("Could not delete sub file: " + tempDir.getAbsolutePath()) - if(!tempDir.mkdir) - throw new IOException("Could not create sub directory: " + tempDir.getAbsolutePath()) - tempDir - } + /** + * Resets the parent of the file to the directory. + * @param dir New parent directory. + * @param file Path to the file to be re-rooted. + * @return Absolute path to the new file. + */ + def resetParent(dir: File, file: File) = subDir(dir.getAbsoluteFile, file.getName).getAbsoluteFile - def reset(dir: File, file: File) = sub(dir, file.getName).getAbsoluteFile - def absolute(dir: File, file: File) = sub(dir, file.getPath).getAbsoluteFile + /** + * Creates a scatterGatherTempDir directory with the prefix and optional suffix. + * @param prefix Prefix for the directory name. + * @param suffix Optional suffix for the directory name. Defaults to "". + * @return The created temporary directory. + * @throws IOException if the directory could not be created. + */ + def tempDir(prefix: String, suffix: String = "") = { + val temp = File.createTempFile(prefix + "-", suffix) + if(!temp.delete) + throw new IOException("Could not delete sub file: " + temp.getAbsolutePath()) + if(!temp.mkdir) + throw new IOException("Could not create sub directory: " + temp.getAbsolutePath()) + temp + } } diff --git a/scala/src/org/broadinstitute/sting/queue/util/Logging.scala b/scala/src/org/broadinstitute/sting/queue/util/Logging.scala index c61a6267f..5a9fed204 100755 --- a/scala/src/org/broadinstitute/sting/queue/util/Logging.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/Logging.scala @@ -7,25 +7,5 @@ import org.apache.log4j._ */ trait Logging { private val className = this.getClass.getName - protected lazy val logger = { - Logging.configureLogging - Logger.getLogger(className) - } -} - -object Logging { - private var configured = false - private var level = Level.INFO - def configureLogging = { - if (!configured) { - var root = Logger.getRootLogger - root.addAppender(new ConsoleAppender(new PatternLayout("%-5p %d{HH:mm:ss,SSS} - %m %n"))) - root.setLevel(level) - configured = true - } - } - - def setDebug = setLevel(Level.DEBUG) - def setTrace = setLevel(Level.TRACE) - private def setLevel(level: Level) = {this.level = level; Logger.getRootLogger.setLevel(level)} + protected lazy val logger = Logger.getLogger(className) } diff --git a/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala b/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala new file mode 100644 index 000000000..f18ad4304 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/LsfJob.scala @@ -0,0 +1,142 @@ +package org.broadinstitute.sting.queue.util + +import java.util.regex.Pattern +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.QException + +/** + * An job submitted to LSF. This class is designed to work somewhat like + * java.lang.Process, but has some extensions. + * + * @author A subset of the original BroadCore ported to scala by Khalid Shakir + */ +class LsfJob extends CommandLineJob with Logging { + var name: String = _ + var project: String = _ + var queue: String = _ + var preExecCommand: String = _ + var postExecCommand: String = _ + var waitForCompletion = false + var extraBsubArgs: List[String] = Nil + var bsubJobId: String = _ + + /** + * Starts the job. Command must exist. The job will be submitted to LSF. + */ + def run() = { + assert(bsubJobId == null, "LSF job was already started") + assert(command != null, "Command was not set on LSF job") + assert(outputFile != null, "Output file must be set on LSF job") + + // capture the output for debugging + val stdinSettings = new ProcessController.InputStreamSettings(null, null) + val stdoutSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false) + val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, null, false) + + // This is really nice for debugging, but spits out way too much stuff otherwise! + // log.info("About to execute LSF command: " + StringUtils.join(argArray, " ")); + + // Get environment vars and strip out LD_ASSUME_KERNEL + // This is necessary since GAP servers on linux 2.4.x kernel and can be removed when + // its no longer true. Only 'classic' LSF queue has 2.4 kernel-based machines. + + // launch the bsub job from the current directory + val processSettings = new ProcessController.ProcessSettings( + bsubCommand, environmentVariables, null, stdinSettings, stdoutSettings, stderrSettings, false) + val bsubOutput = processController.exec(processSettings) + + if (bsubOutput.exitValue != 0) { + logger.error("Failed to submit LSF job, got exit code %s. Standard error contained: %n%s" + .format(bsubOutput.exitValue, content(bsubOutput.stderr))) + throw new QException("Failed to submit LSF job, got exit code %s.".format(bsubOutput.exitValue)) + } + + // get the LSF job ID + val matcher = LsfJob.JOB_ID.matcher(bsubOutput.stdout.content) + matcher.find() + bsubJobId = matcher.group + + // set job name to LSF_ if not set already + if (name == null) + name = "lsf_job_" + bsubJobId + } + + /** + * Generates the bsub command line for this LsfJob. + * @return command line as a Array[String] + */ + def bsubCommand = { + var args = List.empty[String] + args :+= "bsub" + + if (name != null) { + args :+= "-J" + args :+= name + } + + if (inputFile != null) { + args :+= "-i" + args :+= inputFile.getAbsolutePath + } + + args :+= "-o" + args :+= outputFile.getAbsolutePath + + if (errorFile != null) { + args :+= "-e" + args :+= errorFile.getAbsolutePath + } + + if (queue != null) { + args :+= "-q" + args :+= queue + } + + if (project != null) { + args :+= "-P" + args :+= project + } + + if (preExecCommand != null) { + args :+= "-E" + args :+= preExecCommand + } + + if (postExecCommand != null) { + args :+= "-Ep" + args :+= postExecCommand + } + + if (workingDir != null) { + args :+= "-cwd" + args :+= workingDir.getPath + } + + if (waitForCompletion) { + args :+= "-K" + } + + args ++= extraBsubArgs + + args :+= command + + args.toArray + } + + /** + * Get the list of environment variables and pass into the exec job. We strip + * out LD_ASSUME_KERNEL because it behaves badly when running bsub jobs across + * different versions of the linux OS. + * + * @return array of environment vars in 'name=value' format. + */ + private def environmentVariables = + System.getenv() + .filterNot{case (name, value) => name.equalsIgnoreCase("LD_ASSUME_KERNEL") || value == null} + .toMap +} + +object LsfJob { + /** Used to search the stdout for the job id. */ + private val JOB_ID = Pattern.compile("\\d+") +} diff --git a/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala new file mode 100644 index 000000000..80162582e --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala @@ -0,0 +1,360 @@ +package org.broadinstitute.sting.queue.util + +import java.io._ +import scala.collection.mutable.{HashSet, ListMap} + +/** + * Facade to Runtime.exec() and java.lang.Process. Handles + * running a process to completion and returns stdout and stderr + * as strings. Creates separate threads for reading stdout and stderr, + * then reuses those threads for each process most efficient use is + * to create one of these and use it repeatedly. Instances are not + * thread-safe, however. + * + * @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir + */ +class ProcessController extends Logging { + + // Threads that capture stdout and stderr + private val stdoutCapture = new OutputCapture(ProcessController.STDOUT_KEY) + private val stderrCapture = new OutputCapture(ProcessController.STDERR_KEY) + + // Communication channels with output capture threads + /** Holds the stdout and stderr sent to the background capture threads */ + private val toCapture = new ListMap[String, ProcessController.CapturedStreamOutput] + + /** Holds the results of the capture from the background capture threads. + * May be the content via toCapture or an EmptyStreamOutput if the capture was interrupted. */ + private val fromCapture = new ListMap[String, ProcessController.StreamOutput] + + // Start the background threads for this controller. + stdoutCapture.start() + stderrCapture.start() + + /** + * Executes a command line program with the settings and waits for it to return, processing the output on a background thread. + * @param settings Settings to be run. + * @return The output of the command. + */ + def exec(settings: ProcessController.ProcessSettings): ProcessController.ProcessOutput = { + var builder = new ProcessBuilder(settings.cmdarray:_*) + builder.directory(settings.directory) + + if (settings.environment != null) { + val builderEnvironment = builder.environment + builderEnvironment.clear() + settings.environment.foreach{case (name, value) => builderEnvironment.put(name, value)} + } + + builder.redirectErrorStream(settings.redirectErrorStream) + + var stdout: ProcessController.StreamOutput = null + var stderr: ProcessController.StreamOutput = null + val process = builder.start + + ProcessController.running.add(process) + try { + val stdoutSettings = if (settings.stdoutSettings == null) ProcessController.EmptyStreamSettings else settings.stdoutSettings + val stderrSettings = if (settings.stderrSettings == null) ProcessController.EmptyStreamSettings else settings.stderrSettings + + toCapture.synchronized { + toCapture.put(ProcessController.STDOUT_KEY, new ProcessController.CapturedStreamOutput(process.getInputStream, stdoutSettings)) + toCapture.put(ProcessController.STDERR_KEY, new ProcessController.CapturedStreamOutput(process.getErrorStream, stderrSettings)) + toCapture.notifyAll() + } + + if (settings.stdinSettings.input != null) { + val writer = new OutputStreamWriter(process.getOutputStream) + writer.write(settings.stdinSettings.input) + writer.flush() + } + if (settings.stdinSettings.inputFile != null) { + val reader = new FileReader(settings.stdinSettings.inputFile) + val writer = new OutputStreamWriter(process.getOutputStream) + val buf = new Array[Char](4096) + var readCount = 0 + while ({readCount = reader.read(buf); readCount} >= 0) + writer.write(buf, 0, readCount) + writer.flush() + reader.close() + } + + try { + process.getOutputStream.close() + process.waitFor() + } finally { + while (stdout == null || stderr == null) { + fromCapture.synchronized { + fromCapture.remove(ProcessController.STDOUT_KEY) match { + case Some(stream) => stdout = stream + case None => /* ignore */ + } + fromCapture.remove(ProcessController.STDERR_KEY) match { + case Some(stream) => stderr = stream + case None => /* ignore */ + } + + try { + if (stdout == null || stderr == null) + fromCapture.wait() + } catch { + case e: InterruptedException => + logger.error(e) + } + } + } + } + } finally { + ProcessController.running.remove(process) + } + + new ProcessController.ProcessOutput(process.exitValue, stdout, stderr) + } + + /** Ensures that the threads used to manipulate the IO for the process are cleaned up properly. */ + def close() = { + try { + stdoutCapture.interrupt() + stderrCapture.interrupt() + } catch { + case e => + logger.error(e) + } + } + + /** calls close() */ + override def finalize = close() + + /** + * Reads in the output of a stream on a background thread to keep the output pipe from backing up and freezing the called process. + * @param key The stdout or stderr key for this output capture. + */ + private class OutputCapture(private val key: String) + extends Thread("OutputCapture-" + key + "-" + Thread.currentThread.getName) { + + setDaemon(true) + + /** Runs the capture. */ + override def run = { + var break = false + while (!break) { + var processStream: ProcessController.StreamOutput = ProcessController.EmptyStreamOutput + try { + // Wait for a new input stream to be passed from this process controller. + var capturedProcessStream: ProcessController.CapturedStreamOutput = null + while (capturedProcessStream == null) { + toCapture.synchronized { + toCapture.remove(key) match { + case Some(stream) => capturedProcessStream = stream + case None => toCapture.wait() + } + } + } + // Read in the input stream + processStream = capturedProcessStream + capturedProcessStream.read + } catch { + case e: InterruptedException => { + logger.info("OutputReader interrupted, exiting") + break = true + } + case e: IOException => { + logger.error("Error reading process output", e) + } + } finally { + // Send the string back to the process controller. + fromCapture.synchronized { + fromCapture.put(key, processStream) + fromCapture.notify() + } + } + } + } + } +} + +/** + * Facade to Runtime.exec() and java.lang.Process. Handles + * running a process to completion and returns stdout and stderr + * as strings. Creates separate threads for reading stdout and stderr, + * then reuses those threads for each process most efficient use is + * to create one of these and use it repeatedly. Instances are not + * thread-safe, however. + * + * @author originally by Michael Koehrsen ported to scala and enhanced by Khalid Shakir + */ +object ProcessController extends Logging { + + /** + * Settings that define how to run a process. + * @param cmdarray Command line to run. + * @param environment Environment settings to override System.getEnv, or null to use System.getEnv. + * @param directory The directory to run the command in, or null to run in the current directory. + * @param stdinSettings Settings for writing to the process stdin. + * @param stdoutSettings Settings for capturing the process stdout. + * @param stderrSettings Setting for capturing the process stderr. + * @param redirectErrorStream true if stderr should be sent to stdout. + */ + class ProcessSettings(val cmdarray: Array[String], val environment: Map[String, String], val directory: File, + val stdinSettings: InputStreamSettings, val stdoutSettings: OutputStreamSettings, + val stderrSettings: OutputStreamSettings, val redirectErrorStream: Boolean) + + /** + * Settings that define text to write to the process stdin. + * @param input String to write to stdin. + * @param inputFile File to write to stdin. + */ + class InputStreamSettings(val input: String, val inputFile: File) + + /** + * Settings that define text to capture from a process stream. + * @param stringSize The number of characters to capture, or -1 for unlimited. + * @param outputFile The file to write output to, or null to skip output. + * @param outputFileAppend true if the output file should be appended to. + */ + class OutputStreamSettings(val stringSize: Int, val outputFile: File, val outputFileAppend: Boolean) + + /** + * The output of a process. + * @param exitValue The exit value. + * @param stdout The capture of stdout as defined by the stdout OutputStreamSettings. + * @param stderr The capture of stderr as defined by the stderr OutputStreamSettings. + */ + class ProcessOutput(val exitValue: Int, val stdout: StreamOutput, val stderr: StreamOutput) + + /** + * The base class of stream output. + */ + abstract class StreamOutput { + /** + * Returns the content as a string. + * @return The content as a string. + */ + def content: String + + /** + * Returns true if the content was truncated. + * @return true if the content was truncated. + */ + def contentTruncated: Boolean + } + + private var currentCaptureId = 0 + /** + * Returns the next output capture id. + * @return The next output capture id. + */ + private def NEXT_OUTPUT_CAPTURE_ID = { + currentCaptureId += 1 + currentCaptureId + } + private val STDOUT_KEY = "stdout" + private val STDERR_KEY = "stderr" + + /** Tracks running processes so that they can be killed as the JVM shuts down. */ + private val running = new HashSet[Process]() + Runtime.getRuntime.addShutdownHook(new Thread { + /** Kills running processes as the JVM shuts down. */ + override def run = for (process <- running.clone) { + logger.warn("Killing: " + process) + process.destroy + } + }) + + /** Empty stream settings used when no output is requested. */ + private object EmptyStreamSettings extends OutputStreamSettings(0, null, false) + + /** Empty stream output when no output is captured due to an error. */ + private object EmptyStreamOutput extends StreamOutput { + def content = "" + def contentTruncated = false + } + + /** + * Stream output captured from a stream. + * @param stream Stream to capture output. + * @param settings Settings that define what to capture. + */ + private class CapturedStreamOutput(val stream: InputStream, val settings: OutputStreamSettings) extends StreamOutput { + /** + * Returns the captured content as a string. + * @return The captured content as a string. + */ + def content = stringWriter.toString() + + /** + * Returns true if the captured content was truncated. + * @return true if the captured content was truncated. + */ + def contentTruncated = stringTruncated + + /** + * Drain the input stream to keep the process from backing up until it's empty. + */ + def read() = { + val reader = new InputStreamReader(stream) + val buf = new Array[Char](4096) + var readCount = 0 + while ({readCount = reader.read(buf); readCount} >= 0) { + writeString(buf, readCount) + writeFile(buf, readCount) + } + closeFile() + stream.close() + } + + /** The string to write capture content. */ + private lazy val stringWriter = if (settings.stringSize < 0) new StringWriter else new StringWriter(settings.stringSize) + + /** True if the content is truncated. */ + private var stringTruncated = false + + /** The number of characters left until the buffer is full. */ + private var stringRemaining = settings.stringSize + + /** + * Writes the buffer to the stringWriter up to stringRemaining characters. + * @param chars Character buffer to write. + * @param len Number of characters in the buffer. + */ + private def writeString(chars: Array[Char], len: Int) = { + if (settings.stringSize < 0) { + stringWriter.write(chars, 0, len) + } else { + if (!stringTruncated) { + stringWriter.write(chars, 0, if (len > stringRemaining) stringRemaining else len) + stringRemaining -= len + if (stringRemaining < 0) + stringTruncated = true + } + } + } + + /** The file writer to capture content or null if no output file was requested. */ + private lazy val fileWriter = { + if (settings.outputFile == null) { + null + } else { + new FileWriter(settings.outputFile, settings.outputFileAppend) + } + } + + /** + * Writes the buffer to the fileWriter if it is not null. + * @param chars Character buffer to write. + * @param len Number of characters in the buffer. + */ + private def writeFile(chars: Array[Char], len: Int) = { + if (fileWriter != null) + fileWriter.write(chars, 0, len) + } + + /** Closes the fileWriter if it is not null. */ + private def closeFile() = { + if (fileWriter != null) { + fileWriter.flush + fileWriter.close + } + } + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala deleted file mode 100755 index f79a4f33d..000000000 --- a/scala/src/org/broadinstitute/sting/queue/util/ProcessUtils.scala +++ /dev/null @@ -1,43 +0,0 @@ -package org.broadinstitute.sting.queue.util - -import org.broadinstitute.sting.utils.text.XReadLines -import collection.mutable.ListBuffer -import collection.JavaConversions._ -import java.io.File - -object ProcessUtils extends Logging { - - Runtime.getRuntime.addShutdownHook(new Thread { - override def run = for (process <- running.clone) { - logger.warn("Killing: " + process) - process.destroy - } - }) - - val running = new ListBuffer[Process]() - - def runCommandAndWait(command: String, directory: File) = { - logger.debug("Running command: " + command) - - var builder = new ProcessBuilder("sh", "-c", command).directory(directory) - - var process = builder.start - running += process - var result = process.waitFor - running -= process - - if (logger.isDebugEnabled) { - for (line <- new XReadLines(process.getInputStream).iterator) { - logger.debug("command: " + line) - } - - for (line <- new XReadLines(process.getErrorStream).iterator) { - logger.error("command: " + line) - } - } - - logger.debug("Command exited with result: " + result) - - result - } -} diff --git a/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala index 566e3cc02..6f6ffdcc7 100644 --- a/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/ReflectionUtils.scala @@ -2,67 +2,90 @@ package org.broadinstitute.sting.queue.util import org.broadinstitute.sting.queue.QException import java.lang.annotation.Annotation -import scala.concurrent.JavaConversions._ import java.lang.reflect.{ParameterizedType, Field} import org.broadinstitute.sting.commandline.ClassType +import org.broadinstitute.sting.utils.classloader.JVMUtils +/** + * A collection of scala extensions to the Sting JVMUtils. + */ object ReflectionUtils { + + /** + * Returns true if field has the annotation. + * @param field Field to check. + * @param annotation Class of the annotation to look for. + * @return true if field has the annotation. + */ def hasAnnotation(field: Field, annotation: Class[_ <: Annotation]) = field.getAnnotation(annotation) != null + /** + * Gets the annotation or throws an exception if the annotation is not found. + * @param field Field to check. + * @param annotation Class of the annotation to look for. + * @return The annotation. + */ def getAnnotation[T <: Annotation](field: Field, annotation: Class[T]): T = { if (!hasAnnotation(field, annotation)) throw new QException("Field %s is missing annotation %s".format(field, annotation)) field.getAnnotation(annotation).asInstanceOf[T] } - + + /** + * Returns all the declared fields on a class in order of sub type to super type. + * @param clazz Base class to start looking for fields. + * @return List[Field] found on the class and all super classes. + */ def getAllFields(clazz: Class[_]) = getAllTypes(clazz).map(_.getDeclaredFields).flatMap(_.toList) - def filterFields(fields: List[Field], annotation: Class[_ <: Annotation]) = fields.filter(field => hasAnnotation(field, annotation)) - - def getFieldValues(obj: AnyRef, fields: List[Field]) = fields.map(field => fieldGetter(field).invoke(obj)) - + /** + * Gets all the types on a class in order of sub type to super type. + * @param clazz Base class. + * @return List[Class] including the class and all super classes. + */ def getAllTypes(clazz: Class[_]) = { var types = List.empty[Class[_]] - var c = clazz - while (c != null) { - types :+= c - c = c.getSuperclass - } + var c = clazz + while (c != null) { + types :+= c + c = c.getSuperclass + } types } - def getValue(obj: AnyRef, field: Field) = fieldGetter(field).invoke(obj) - def setValue(obj: AnyRef, field: Field, value: Any) = fieldSetter(field).invoke(obj, value.asInstanceOf[AnyRef]) - - def addOrUpdateWithStringValue(obj: AnyRef, field: Field, value: String) = { - val getter = fieldGetter(field) - val setter = fieldSetter(field) - - if (classOf[Seq[_]].isAssignableFrom(field.getType)) { - - val fieldType = getCollectionType(field) - val typeValue = coerce(fieldType, value) - - var list = getter.invoke(obj).asInstanceOf[Seq[_]] - list :+= typeValue - setter.invoke(obj, list) - - } else if (classOf[Option[_]].isAssignableFrom(field.getType)) { - - val fieldType = getCollectionType(field) - val typeValue = coerce(fieldType, value) - - setter.invoke(obj, Some(typeValue)) - - } else { - - val fieldType = field.getType - val typeValue = coerce(fieldType, value) - - setter.invoke(obj, typeValue.asInstanceOf[AnyRef]) + /** + * Gets a field value using reflection. + * Attempts to use the scala getter then falls back to directly accessing the field. + * @param obj Object to inspect. + * @param field Field to retrieve. + * @return The field value. + */ + def getValue(obj: AnyRef, field: Field): AnyRef = + try { + field.getDeclaringClass.getMethod(field.getName).invoke(obj) + } catch { + case e: NoSuchMethodException => JVMUtils.getFieldValue(field, obj) } - } + /** + * Sets a field value using reflection. + * Attempts to use the scala setter then falls back to directly accessing the field. + * @param obj Object to inspect. + * @param field Field to set. + * @param value The new field value. + */ + def setValue(obj: AnyRef, field: Field, value: Any) = + try { + field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType).invoke(obj, value.asInstanceOf[AnyRef]) + } catch { + case e: NoSuchMethodException => JVMUtils.setFieldValue(field, obj, value) + } + + /** + * Returns the collection type of a field or throws an exception if the field contains more than one parameterized type, or the collection type cannot be found. + * @param field Field to retrieve the collection type. + * @return The collection type for the field. + */ def getCollectionType(field: Field) = { getGenericTypes(field) match { case Some(classes) => @@ -70,10 +93,15 @@ object ReflectionUtils { throw new IllegalArgumentException("Field contains more than one generic type: " + field) classes(0) case None => - throw new QException("Generic type not set for collection: " + field) + throw new QException("Generic type not set for collection. Did it declare an @ClassType?: " + field) } } + /** + * Returns the generic types for a field or None. + * @param field Field to retrieve the collection type. + * @return The array of classes that are in the collection type, or None if the type cannot be found. + */ private def getGenericTypes(field: Field): Option[Array[Class[_]]] = { // TODO: Refactor: based on java code in org.broadinstitute.sting.commandline.ArgumentTypeDescriptor // If this is a parameterized collection, find the contained type. If blow up if only one type exists. @@ -85,39 +113,4 @@ object ReflectionUtils { } else None } - - private def fieldGetter(field: Field) = - try { - field.getDeclaringClass.getMethod(field.getName) - } catch { - case e: NoSuchMethodException => throw new QException("Field may be private? Unable to find getter for field: " + field) - } - - private def fieldSetter(field: Field) = - try { - field.getDeclaringClass.getMethod(field.getName+"_$eq", field.getType) - } catch { - case e: NoSuchMethodException => throw new QException("Field may be a val instead of var? Unable to find setter for field: " + field) - } - - private def coerce(clazz: Class[_], value: String) = { - if (classOf[String] == clazz) value - else if (classOf[Boolean] == clazz) value.toBoolean - else if (classOf[Byte] == clazz) value.toByte - else if (classOf[Short] == clazz) value.toShort - else if (classOf[Int] == clazz) value.toInt - else if (classOf[Long] == clazz) value.toLong - else if (classOf[Float] == clazz) value.toFloat - else if (classOf[Double] == clazz) value.toDouble - else if (hasStringConstructor(clazz)) - clazz.getConstructor(classOf[String]).newInstance(value) - else throw new QException("Unable to coerce value '%s' to type '%s'.".format(value, clazz)) - } - - private def hasStringConstructor(clazz: Class[_]) = { - clazz.getConstructors.exists(constructor => { - val parameters = constructor.getParameterTypes - parameters.size == 1 && parameters.head == classOf[String] - }) - } } diff --git a/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala new file mode 100644 index 000000000..f2c84649c --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala @@ -0,0 +1,71 @@ +package org.broadinstitute.sting.queue.util + +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.QException +import java.lang.Class +import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor} + +/** + * An ArgumentTypeDescriptor that can parse the scala collections. + */ +class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { + + /** + * Checks if the class type is a scala collection. + * @param classType Class type to check. + * @return true if the class is a List, Set, or an Option. + */ + def supports(classType: Class[_]) = isCompound(classType) + + /** + * Checks if the class type is a scala collection. + * @param source Argument source to check. + * @return true if the source is a List, Set, or an Option. + */ + override def isMultiValued(source: ArgumentSource) = isCompound(source.field.getType) + + /** + * Checks if the class type is a scala collection. + * @param classType Class type to check. + * @return true if the class is a List, Set, or an Option. + */ + private def isCompound(classType: Class[_]) = { + classOf[List[_]].isAssignableFrom(classType) || + classOf[Set[_]].isAssignableFrom(classType) || + classOf[Option[_]].isAssignableFrom(classType) + } + + /** + * Parses the argument matches based on the class type of the argument source's field. + * @param source Argument source that contains the field being populated. + * @param classType Class type being parsed. + * @param argumentMatches The argument match strings that were found for this argument source. + * @return The parsed object. + */ + def parse(source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { + val componentType = ReflectionUtils.getCollectionType(source.field) + val componentArgumentParser = ArgumentTypeDescriptor.create(componentType) + + if (classOf[List[_]].isAssignableFrom(classType)) { + var list = List.empty[Any] + for (argumentMatch <- argumentMatches) + for (value <- argumentMatch) + list :+= componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) + list + } else if (classOf[Set[_]].isAssignableFrom(classType)) { + var set = Set.empty[Any] + for (argumentMatch <- argumentMatches) + for (value <- argumentMatch) + set += componentArgumentParser.parse(source, componentType, new ArgumentMatches(value)) + set + } else if (classOf[Option[_]].isAssignableFrom(classType)) { + if (argumentMatches.size > 1) + throw new QException("Unable to set Option to multiple values: " + argumentMatches.mkString(" ")) + else if (argumentMatches.size == 1) + Some(componentArgumentParser.parse(source, componentType, argumentMatches)) + else + None + } else + throw new QException("Unsupported compound argument type: " + classType) + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala new file mode 100755 index 000000000..e4f8f2899 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala @@ -0,0 +1,37 @@ +package org.broadinstitute.sting.queue.util + +import org.broadinstitute.sting.queue.QException + +/** + * Runs a job on the command line by invoking "sh -c " + */ +class ShellJob extends CommandLineJob with Logging { + /** + * Runs the command and waits for the output. + */ + def run() = { + assert(command != null, "Command was not set on job") + + val (redirectError, errorFile) = if (this.errorFile == null) (true, null) else (false, this.errorFile) + val bufferSize = if (logger.isDebugEnabled) FIVE_MB else 0 + val stdinSettings = new ProcessController.InputStreamSettings(null, this.inputFile) + val stdoutSettings = new ProcessController.OutputStreamSettings(bufferSize, this.outputFile, true) + val stderrSettings = new ProcessController.OutputStreamSettings(FIVE_MB, errorFile, true) + val processSettings = new ProcessController.ProcessSettings( + Array("sh", "-c", command), null, this.workingDir, stdinSettings, stdoutSettings, stderrSettings, redirectError) + + val output = processController.exec(processSettings) + + if (logger.isDebugEnabled) { + logger.debug("output: " + content(output.stdout)) + logger.debug("error: " + content(output.stderr)) + logger.debug("Command exited with result: " + output.exitValue) + } + + if (output.exitValue != 0) { + logger.error("Failed to run job, got exit code %s. Standard error contained: %n%s" + .format(output.exitValue, content(output.stderr))) + throw new QException("Failed to run job, got exit code %s.".format(output.exitValue)) + } + } +} diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index 9a2acdd28..e5f39d0f2 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -6,7 +6,9 @@ - + + + @@ -15,5 +17,8 @@ + + + diff --git a/settings/repository/edu.mit.broad/broad-core-all-2.8.jar b/settings/repository/edu.mit.broad/broad-core-all-2.8.jar deleted file mode 100644 index 715288886..000000000 Binary files a/settings/repository/edu.mit.broad/broad-core-all-2.8.jar and /dev/null differ diff --git a/settings/repository/edu.mit.broad/broad-core-all-2.8.xml b/settings/repository/edu.mit.broad/broad-core-all-2.8.xml deleted file mode 100644 index 7e7b31e80..000000000 --- a/settings/repository/edu.mit.broad/broad-core-all-2.8.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml b/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml index 65899298f..75fd688fb 100644 --- a/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml +++ b/settings/repository/org.reflections/reflections-0.9.5-svnversion79M_mod2.xml @@ -1,3 +1,12 @@ + + + + + + + + +