From 0105e8d063b7aedab0cd91b301cd5745c0b1f7f3 Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 25 Aug 2010 22:17:36 +0000 Subject: [PATCH] Updated Queue GATK generation to reflect -B and -I changes. To add support for "-I:tumor tumor.bam", the GATK argument import_file (-I) is now generated as a List of NamedFile objects. Could not get sugar working 100%. To activate sugar import the gatk package. This effectively adds a new method to java.io.File called toNamedFile. When adding a file to the list call countReads.import_file :+= myJavaFile.toNamedFile See scala/qscript/examples for actual examples. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4122 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/ArgumentDefinitionField.java | 55 ++++++++++++------- .../queue/extensions/gatk/ArgumentField.java | 14 ++++- .../queue/extensions/gatk/RodBindField.java | 4 +- .../qscript/examples/ExampleCountReads.scala | 2 +- .../examples/ExampleUnifiedGenotyper.scala | 10 ++-- .../queue/extensions/gatk/NamedFile.scala | 35 ++++++++++++ .../extensions/gatk/NamedFileWrapper.scala | 9 +++ .../sting/queue/extensions/gatk/RodBind.scala | 24 +++++++- .../sting/queue/extensions/gatk/package.scala | 13 +++++ .../queue/function/CommandLineFunction.scala | 33 ++++++----- 10 files changed, 153 insertions(+), 46 deletions(-) create mode 100644 scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFile.scala create mode 100644 scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFileWrapper.scala create mode 100644 scala/src/org/broadinstitute/sting/queue/extensions/gatk/package.scala diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index e4523519e..6d460185a 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -46,6 +46,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected boolean isRequired() { return argumentDefinition.required; } @Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); } @Override protected String getValidation() { return escape(argumentDefinition.validation); } + protected boolean isFlag() { return argumentDefinition.isFlag; } + protected boolean isMultiValued() { return argumentDefinition.isMultiValued; } protected final String getShortFieldGetter() { return getFieldName(getRawShortFieldName()); } protected final String getShortFieldSetter() { return getFieldName(getRawShortFieldName() + "_="); } @@ -75,8 +77,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } protected static final String REQUIRED_TEMPLATE = " + \" %1$s \" + %2$s.format(%3$s)"; - protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=%2$s)"; - protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=%2$s)"; + protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=formatValue(%2$s))"; + protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=formatValue(%2$s))"; protected static final String FLAG_TEMPLATE = " + (if (%3$s) \" %1$s\" else \"\")"; public final String getCommandLineAddition() { @@ -99,7 +101,10 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } protected String getCommandLineTemplate() { - return isRequired() ? REQUIRED_TEMPLATE : OPTIONAL_TEMPLATE; + if (isFlag()) return FLAG_TEMPLATE; + if (isMultiValued()) return REPEAT_TEMPLATE; + if (isRequired()) return REQUIRED_TEMPLATE; + return OPTIONAL_TEMPLATE; } public static List getArgumentFields(Class classType) { @@ -122,11 +127,11 @@ public abstract class ArgumentDefinitionField extends ArgumentField { // ROD Bindings are set by the RodBindField } else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets - return Collections.singletonList(new RodBindArgumentField(argumentDefinition, argumentDefinition.required)); + return Collections.singletonList(new RodBindArgumentField(argumentDefinition)); //return Collections.emptyList(); } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { - return Arrays.asList(new InputArgumentField(argumentDefinition), new IndexFilesField()); + return Arrays.asList(new InputNamedFileDefinitionField(argumentDefinition), new IndexFilesField()); } else if (argumentDefinition.ioType == ArgumentIOType.INPUT) { return Collections.singletonList(new InputArgumentField(argumentDefinition)); @@ -219,14 +224,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { } @Override protected Class getInnerType() { return File.class; } - @Override protected String getFieldType() { return String.format(isMultiValued() ? "List[%s]" : "%s", getRawFieldType()); } + @Override protected String getFieldType() { return isMultiValued() ? "List[File]" : "File"; } @Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; } - @Override protected String getCommandLineTemplate() { - return isMultiValued() ? REPEAT_TEMPLATE : super.getCommandLineTemplate(); - } - - protected String getRawFieldType() { return "File"; } - protected boolean isMultiValued() { return argumentDefinition.isMultiValued; } } // if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) @@ -313,15 +312,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField { /** * The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file. */ - public static class RodBindArgumentField extends InputArgumentField { - private boolean isRequired; - public RodBindArgumentField(ArgumentDefinition argumentDefinition, boolean isRequired) { + public static class RodBindArgumentField extends ArgumentDefinitionField { + public RodBindArgumentField(ArgumentDefinition argumentDefinition) { super(argumentDefinition); - this.isRequired = isRequired; } + @Override protected Class getInnerType() { return null; } // RodBind does not need to be imported. + @Override protected String getFieldType() { return "List[RodBind]"; } + @Override protected String getDefaultValue() { return "Nil"; } + @Override protected String getCommandLineTemplate() { + return " + repeat(\"\", %3$s, format=RodBind.formatCommandLine(\"%1$s\"))"; + } + } - @Override protected boolean isRequired() { return this.isRequired; } - @Override protected String getRawFieldType() { return "RodBind"; } + /** + * Named input_files. + */ + public static class InputNamedFileDefinitionField extends ArgumentDefinitionField { + public InputNamedFileDefinitionField(ArgumentDefinition argumentDefinition) { + super(argumentDefinition); + } + @Override protected Class getInnerType() { return null; } // NamedFile does not need to be imported. + @Override protected String getFieldType() { return "List[NamedFile]"; } + @Override protected String getDefaultValue() { return "Nil"; } + @Override protected String getCommandLineTemplate() { + return " + repeat(\"\", %3$s, format=NamedFile.formatCommandLine(\"%1$s\"))"; + } } /** @@ -339,8 +354,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getRawFieldName() { return "index_files"; } @Override protected String getFreezeFields() { return String.format( - "index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" + - "index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n"); + "index_files ++= input_file.filter(bam => bam != null && bam.file.getName.endsWith(\".bam\")).map(bam => new File(bam.file.getPath + \".bai\"))%n" + + "index_files ++= input_file.filter(sam => sam != null && sam.file.getName.endsWith(\".sam\")).map(sam => new File(sam.file.getPath + \".sai\"))%n"); } } diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index a2ae317fc..44dded793 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -116,15 +116,23 @@ public abstract class ArgumentField { /** @return The scala default value. */ protected abstract String getDefaultValue(); - /** @return The class of the field, or the component type if the scala field is a collection. */ + /** @return The class of the field, or the component type if the scala + * field is a collection, or null if no type needs to be imported. + * NOTE: Used in some cases by getFieldType so the two function should be overriden together. + */ protected abstract Class getInnerType(); /** @return A custom command for overriding freeze. */ protected String getFreezeFields() { return ""; } - @SuppressWarnings("unchecked") + /** @return Classes that should be imported. */ protected Collection> getImportClasses() { - return Arrays.asList(this.getInnerType(), getAnnotationIOClass()); + ArrayList> importClasses = new ArrayList>(); + importClasses.add(this.getAnnotationIOClass()); + Class innerType = this.getInnerType(); + if (innerType != null) + importClasses.add(innerType); + return importClasses; } /** @return True if this field uses @Scatter. */ diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java index 7ae929b93..a2bfe3f88 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java @@ -65,8 +65,8 @@ public class RodBindField extends ArgumentField { @Override public String getCommandLineAddition() { return String.format(this.useOption() - ? " + optional(\" -B %s,%s,\", %s)" - : " + \" -B %s,%s,\" + %s", + ? " + optional(\" -B:%s,%s \", %s)" + : " + \" -B:%s,%s \" + %s", this.trackName, this.typeName, getFieldName()); } diff --git a/scala/qscript/examples/ExampleCountReads.scala b/scala/qscript/examples/ExampleCountReads.scala index c02362fa5..b000c9c91 100644 --- a/scala/qscript/examples/ExampleCountReads.scala +++ b/scala/qscript/examples/ExampleCountReads.scala @@ -55,7 +55,7 @@ class ExampleCountReads extends QScript { singleCountReads.jarFile = gatkJar singleCountReads.reference_sequence = referenceFile // ':+' is the scala List append operator - singleCountReads.input_file :+= bamFile + singleCountReads.input_file :+= bamFile.toNamedFile add(singleCountReads) } } diff --git a/scala/qscript/examples/ExampleUnifiedGenotyper.scala b/scala/qscript/examples/ExampleUnifiedGenotyper.scala index f320cdcda..8c4f803e2 100644 --- a/scala/qscript/examples/ExampleUnifiedGenotyper.scala +++ b/scala/qscript/examples/ExampleUnifiedGenotyper.scala @@ -58,13 +58,13 @@ class ExampleUnifiedGenotyper extends QScript { // If you are running this on a compute farm, make sure that the Sting/shell // folder is in your path to use mergeText.sh and splitIntervals.sh. genotyper.scatterCount = 3 - genotyper.input_file :+= qscript.bamFile - genotyper.variants_out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf") + genotyper.input_file :+= qscript.bamFile.toNamedFile + genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf") - evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out) - evalUnfiltered.out = swapExt(genotyper.variants_out, "vcf", "eval") + evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.out) + evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval") - variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out) + variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out) variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf") variantFilter.filterName = filterNames variantFilter.filterExpression = filterExpressions diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFile.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFile.scala new file mode 100644 index 000000000..c5b3917e8 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFile.scala @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import java.io.File +import org.broadinstitute.sting.queue.function.FileProvider + +/** + * Used to provide -I input_file arguments to the GATK. + */ +class NamedFile(var file: File, var name: String = null) extends FileProvider { + require(file != null, "NamedFile file cannot be null") +} + +/** + * Used to provide -I input_file arguments to the GATK. + */ +object NamedFile { + /** + * Formats the rod binding on the command line. + * Used for optional and repeat. + * @param cmdLineParam command line parameter, ex: -I + * @param prefix unused + * @param value NamedFile to add. + * @param suffix unused + * @return The command line addition. + */ + def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = { + value match { + case namedFile: NamedFile => + if (namedFile.name != null) + " %s:%s %s".format(cmdLineParam, namedFile.name, namedFile.file) + else + " %s %s".format(cmdLineParam, namedFile.file) + } + } +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFileWrapper.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFileWrapper.scala new file mode 100644 index 000000000..9041170ff --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/NamedFileWrapper.scala @@ -0,0 +1,9 @@ +package org.broadinstitute.sting.queue.extensions.gatk + +import java.io.File +import org.broadinstitute.sting.queue.function.FileProvider + +class NamedFileWrapper(private val file: File) { + def toNamedFile = new NamedFile(file) + def toNamedFile(name: String) = new NamedFile(file, name) +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala index bce054ba0..3cc5f4b33 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala @@ -4,11 +4,31 @@ import java.io.File import org.broadinstitute.sting.queue.function.FileProvider /** - * Used to provide -B rodBinding arguments to the GATK. + * Used to provide -B rodBind arguments to the GATK. */ case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider { require(trackName != null, "RodBind trackName cannot be null") require(trackType != null, "RodBind trackType cannot be null") require(file != null, "RodBind file cannot be null") - override def toString = "%s,%s,%s".format(trackName, trackType, file) +} + +/** + * Used to provide -B rodBind arguments to the GATK. + */ +object RodBind { + /** + * Formats the rod binding on the command line. + * Used for optional and repeat. + * @param cmdLineParam command line parameter, ex: -B + * @param prefix unused + * @param value RodBind to add. + * @param suffix unused + * @return The command line addition. + */ + def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = { + value match { + case rodBind: RodBind => + " %s:%s,%s %s".format(cmdLineParam, rodBind.trackName, rodBind.trackType, rodBind.file) + } + } } diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/package.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/package.scala new file mode 100644 index 000000000..f5ed0e38c --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/package.scala @@ -0,0 +1,13 @@ +package org.broadinstitute.sting.queue.extensions + +import java.io.File +import org.broadinstitute.sting.queue.extensions.gatk.NamedFile +import org.broadinstitute.sting.queue.extensions.gatk.NamedFileWrapper + +package object gatk { + implicit def fileToNamedFileWrapper(file: File) = new NamedFileWrapper(file) + // TODO: Get the syntax right so that the implicits kick in for a generic type, ex: Travesable[File], Traversable[_ <: File], etc. + // but need to return the same outter type, so T <: Traversable[File] : T[NamedFile], T <: Traversable[_ <: File]: T[NamedFile], etc. + implicit def filesToNamedFilesWrapper(files: List[File]) = files.map(file => if (file == null) null else new NamedFile(file)) + implicit def filesToNamedFilesWrapper(files: Set[File]) = files.map(file => if (file == null) null else new NamedFile(file)) +} diff --git a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index 6f0858715..55c10ffda 100644 --- a/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -262,23 +262,25 @@ trait CommandLineFunction extends QFunction with Logging { * @param params Traversable parameters. * @param suffix Optional suffix per parameter. * @param separator Optional separator per parameter. - * @param format Format string if the value has a value + * @param format Format function if the value has a value * @return The generated string */ - protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", format: String = "%s") = - params.filter(param => hasValue(param)).map(param => prefix + toValue(param, format) + suffix).mkString(separator) + protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", + format: (String, Any, String) => String = formatValue("%s")) = + params.filter(param => hasValue(param)).map(param => format(prefix, param, suffix)).mkString(separator) /** * Returns parameter with a prefix/suffix if it is set otherwise returns "". * Does not output null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString. * @param prefix Command line prefix per parameter. - * @param param Parameters to check for a value. + * @param param Parameter to check for a value. * @param suffix Optional suffix per parameter. - * @param format Format string if the value has a value + * @param format Format function if the value has a value * @return The generated string */ - protected def optional(prefix: String, param: Any, suffix: String = "", format: String = "%s") = - if (hasValue(param)) prefix + toValue(param, format) + suffix else "" + protected def optional(prefix: String, param: Any, suffix: String = "", + format: (String, Any, String) => String = formatValue("%s")) = + if (hasValue(param)) format(prefix, param, suffix) else "" /** * Returns fields that do not have values which are required. @@ -365,15 +367,20 @@ trait CommandLineFunction extends QFunction with Logging { /** * Returns "" if the value is null or an empty collection, otherwise return the value.toString. - * @param value Value to test for null, or a collection to test if it is empty. * @param format Format string if the value has a value + * @param prefix Command line prefix per parameter. + * @param param Parameter to check for a value. + * @param suffix Optional suffix per parameter. * @return "" if the value is null, or "" if the collection is empty, otherwise the value.toString. */ - private def toValue(param: Any, format: String): String = if (CollectionUtils.isNullOrEmpty(param)) "" else - param match { - case Some(x) => format.format(x) - case x => format.format(x) - } + protected def formatValue(format: String)(prefix: String, param: Any, suffix: String): String = + if (CollectionUtils.isNullOrEmpty(param)) + "" + else + prefix + (param match { + case Some(x) => format.format(x) + case x => format.format(x) + }) + suffix /** * Gets the value of a field.