Updated Queue GATK generation to reflect -B and -I changes.

To add support for "-I:tumor tumor.bam", the GATK argument
import_file (-I) is now generated as a List of NamedFile objects.
Could not get sugar working 100%.  To activate sugar import the
gatk package.  This effectively adds a new method to java.io.File
called toNamedFile.  When adding a file to the list call
  countReads.import_file :+= myJavaFile.toNamedFile
See scala/qscript/examples for actual examples.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4122 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-08-25 22:17:36 +00:00
parent bdb3a7ebe6
commit 0105e8d063
10 changed files with 153 additions and 46 deletions

View File

@ -46,6 +46,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override protected boolean isRequired() { return argumentDefinition.required; }
@Override protected String getExclusiveOf() { return escape(argumentDefinition.exclusiveOf); }
@Override protected String getValidation() { return escape(argumentDefinition.validation); }
protected boolean isFlag() { return argumentDefinition.isFlag; }
protected boolean isMultiValued() { return argumentDefinition.isMultiValued; }
protected final String getShortFieldGetter() { return getFieldName(getRawShortFieldName()); }
protected final String getShortFieldSetter() { return getFieldName(getRawShortFieldName() + "_="); }
@ -75,8 +77,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
protected static final String REQUIRED_TEMPLATE = " + \" %1$s \" + %2$s.format(%3$s)";
protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=%2$s)";
protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=%2$s)";
protected static final String REPEAT_TEMPLATE = " + repeat(\" %1$s \", %3$s, format=formatValue(%2$s))";
protected static final String OPTIONAL_TEMPLATE = " + optional(\" %1$s \", %3$s, format=formatValue(%2$s))";
protected static final String FLAG_TEMPLATE = " + (if (%3$s) \" %1$s\" else \"\")";
public final String getCommandLineAddition() {
@ -99,7 +101,10 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
protected String getCommandLineTemplate() {
return isRequired() ? REQUIRED_TEMPLATE : OPTIONAL_TEMPLATE;
if (isFlag()) return FLAG_TEMPLATE;
if (isMultiValued()) return REPEAT_TEMPLATE;
if (isRequired()) return REQUIRED_TEMPLATE;
return OPTIONAL_TEMPLATE;
}
public static List<? extends ArgumentField> getArgumentFields(Class<?> classType) {
@ -122,11 +127,11 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
// ROD Bindings are set by the RodBindField
} else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
// TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
return Collections.singletonList(new RodBindArgumentField(argumentDefinition, argumentDefinition.required));
return Collections.singletonList(new RodBindArgumentField(argumentDefinition));
//return Collections.<ArgumentField>emptyList();
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputArgumentField(argumentDefinition), new IndexFilesField());
return Arrays.asList(new InputNamedFileDefinitionField(argumentDefinition), new IndexFilesField());
} else if (argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Collections.singletonList(new InputArgumentField(argumentDefinition));
@ -219,14 +224,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected Class<?> getInnerType() { return File.class; }
@Override protected String getFieldType() { return String.format(isMultiValued() ? "List[%s]" : "%s", getRawFieldType()); }
@Override protected String getFieldType() { return isMultiValued() ? "List[File]" : "File"; }
@Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
@Override protected String getCommandLineTemplate() {
return isMultiValued() ? REPEAT_TEMPLATE : super.getCommandLineTemplate();
}
protected String getRawFieldType() { return "File"; }
protected boolean isMultiValued() { return argumentDefinition.isMultiValued; }
}
// if (argumentDefinition.ioType == ArgumentIOType.OUTPUT)
@ -313,15 +312,31 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
/**
* The other extreme of a NamedRodBindingField, allows the user to specify the track name, track type, and the file.
*/
public static class RodBindArgumentField extends InputArgumentField {
private boolean isRequired;
public RodBindArgumentField(ArgumentDefinition argumentDefinition, boolean isRequired) {
public static class RodBindArgumentField extends ArgumentDefinitionField {
public RodBindArgumentField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
this.isRequired = isRequired;
}
@Override protected Class<?> getInnerType() { return null; } // RodBind does not need to be imported.
@Override protected String getFieldType() { return "List[RodBind]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() {
return " + repeat(\"\", %3$s, format=RodBind.formatCommandLine(\"%1$s\"))";
}
}
@Override protected boolean isRequired() { return this.isRequired; }
@Override protected String getRawFieldType() { return "RodBind"; }
/**
* Named input_files.
*/
public static class InputNamedFileDefinitionField extends ArgumentDefinitionField {
public InputNamedFileDefinitionField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return null; } // NamedFile does not need to be imported.
@Override protected String getFieldType() { return "List[NamedFile]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() {
return " + repeat(\"\", %3$s, format=NamedFile.formatCommandLine(\"%1$s\"))";
}
}
/**
@ -339,8 +354,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override protected String getRawFieldName() { return "index_files"; }
@Override protected String getFreezeFields() {
return String.format(
"index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" +
"index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n");
"index_files ++= input_file.filter(bam => bam != null && bam.file.getName.endsWith(\".bam\")).map(bam => new File(bam.file.getPath + \".bai\"))%n" +
"index_files ++= input_file.filter(sam => sam != null && sam.file.getName.endsWith(\".sam\")).map(sam => new File(sam.file.getPath + \".sai\"))%n");
}
}

View File

@ -116,15 +116,23 @@ public abstract class ArgumentField {
/** @return The scala default value. */
protected abstract String getDefaultValue();
/** @return The class of the field, or the component type if the scala field is a collection. */
/** @return The class of the field, or the component type if the scala
* field is a collection, or null if no type needs to be imported.
* NOTE: Used in some cases by getFieldType so the two function should be overriden together.
*/
protected abstract Class<?> getInnerType();
/** @return A custom command for overriding freeze. */
protected String getFreezeFields() { return ""; }
@SuppressWarnings("unchecked")
/** @return Classes that should be imported. */
protected Collection<Class<?>> getImportClasses() {
return Arrays.asList(this.getInnerType(), getAnnotationIOClass());
ArrayList<Class<?>> importClasses = new ArrayList<Class<?>>();
importClasses.add(this.getAnnotationIOClass());
Class<?> innerType = this.getInnerType();
if (innerType != null)
importClasses.add(innerType);
return importClasses;
}
/** @return True if this field uses @Scatter. */

View File

@ -65,8 +65,8 @@ public class RodBindField extends ArgumentField {
@Override public String getCommandLineAddition() {
return String.format(this.useOption()
? " + optional(\" -B %s,%s,\", %s)"
: " + \" -B %s,%s,\" + %s",
? " + optional(\" -B:%s,%s \", %s)"
: " + \" -B:%s,%s \" + %s",
this.trackName, this.typeName, getFieldName());
}

View File

@ -55,7 +55,7 @@ class ExampleCountReads extends QScript {
singleCountReads.jarFile = gatkJar
singleCountReads.reference_sequence = referenceFile
// ':+' is the scala List append operator
singleCountReads.input_file :+= bamFile
singleCountReads.input_file :+= bamFile.toNamedFile
add(singleCountReads)
}
}

View File

@ -58,13 +58,13 @@ class ExampleUnifiedGenotyper extends QScript {
// If you are running this on a compute farm, make sure that the Sting/shell
// folder is in your path to use mergeText.sh and splitIntervals.sh.
genotyper.scatterCount = 3
genotyper.input_file :+= qscript.bamFile
genotyper.variants_out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
genotyper.input_file :+= qscript.bamFile.toNamedFile
genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out)
evalUnfiltered.out = swapExt(genotyper.variants_out, "vcf", "eval")
evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.variants_out)
variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out)
variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
variantFilter.filterName = filterNames
variantFilter.filterExpression = filterExpressions

View File

@ -0,0 +1,35 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
/**
* Used to provide -I input_file arguments to the GATK.
*/
class NamedFile(var file: File, var name: String = null) extends FileProvider {
require(file != null, "NamedFile file cannot be null")
}
/**
* Used to provide -I input_file arguments to the GATK.
*/
object NamedFile {
/**
* Formats the rod binding on the command line.
* Used for optional and repeat.
* @param cmdLineParam command line parameter, ex: -I
* @param prefix unused
* @param value NamedFile to add.
* @param suffix unused
* @return The command line addition.
*/
def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = {
value match {
case namedFile: NamedFile =>
if (namedFile.name != null)
" %s:%s %s".format(cmdLineParam, namedFile.name, namedFile.file)
else
" %s %s".format(cmdLineParam, namedFile.file)
}
}
}

View File

@ -0,0 +1,9 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
class NamedFileWrapper(private val file: File) {
def toNamedFile = new NamedFile(file)
def toNamedFile(name: String) = new NamedFile(file, name)
}

View File

@ -4,11 +4,31 @@ import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
/**
* Used to provide -B rodBinding arguments to the GATK.
* Used to provide -B rodBind arguments to the GATK.
*/
case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider {
require(trackName != null, "RodBind trackName cannot be null")
require(trackType != null, "RodBind trackType cannot be null")
require(file != null, "RodBind file cannot be null")
override def toString = "%s,%s,%s".format(trackName, trackType, file)
}
/**
* Used to provide -B rodBind arguments to the GATK.
*/
object RodBind {
/**
* Formats the rod binding on the command line.
* Used for optional and repeat.
* @param cmdLineParam command line parameter, ex: -B
* @param prefix unused
* @param value RodBind to add.
* @param suffix unused
* @return The command line addition.
*/
def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = {
value match {
case rodBind: RodBind =>
" %s:%s,%s %s".format(cmdLineParam, rodBind.trackName, rodBind.trackType, rodBind.file)
}
}
}

View File

@ -0,0 +1,13 @@
package org.broadinstitute.sting.queue.extensions
import java.io.File
import org.broadinstitute.sting.queue.extensions.gatk.NamedFile
import org.broadinstitute.sting.queue.extensions.gatk.NamedFileWrapper
package object gatk {
implicit def fileToNamedFileWrapper(file: File) = new NamedFileWrapper(file)
// TODO: Get the syntax right so that the implicits kick in for a generic type, ex: Travesable[File], Traversable[_ <: File], etc.
// but need to return the same outter type, so T <: Traversable[File] : T[NamedFile], T <: Traversable[_ <: File]: T[NamedFile], etc.
implicit def filesToNamedFilesWrapper(files: List[File]) = files.map(file => if (file == null) null else new NamedFile(file))
implicit def filesToNamedFilesWrapper(files: Set[File]) = files.map(file => if (file == null) null else new NamedFile(file))
}

View File

@ -262,23 +262,25 @@ trait CommandLineFunction extends QFunction with Logging {
* @param params Traversable parameters.
* @param suffix Optional suffix per parameter.
* @param separator Optional separator per parameter.
* @param format Format string if the value has a value
* @param format Format function if the value has a value
* @return The generated string
*/
protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", format: String = "%s") =
params.filter(param => hasValue(param)).map(param => prefix + toValue(param, format) + suffix).mkString(separator)
protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "",
format: (String, Any, String) => String = formatValue("%s")) =
params.filter(param => hasValue(param)).map(param => format(prefix, param, suffix)).mkString(separator)
/**
* Returns parameter with a prefix/suffix if it is set otherwise returns "".
* Does not output null, Nil, None. Unwraps Some(x) to x. Everything else is called with x.toString.
* @param prefix Command line prefix per parameter.
* @param param Parameters to check for a value.
* @param param Parameter to check for a value.
* @param suffix Optional suffix per parameter.
* @param format Format string if the value has a value
* @param format Format function if the value has a value
* @return The generated string
*/
protected def optional(prefix: String, param: Any, suffix: String = "", format: String = "%s") =
if (hasValue(param)) prefix + toValue(param, format) + suffix else ""
protected def optional(prefix: String, param: Any, suffix: String = "",
format: (String, Any, String) => String = formatValue("%s")) =
if (hasValue(param)) format(prefix, param, suffix) else ""
/**
* Returns fields that do not have values which are required.
@ -365,15 +367,20 @@ trait CommandLineFunction extends QFunction with Logging {
/**
* Returns "" if the value is null or an empty collection, otherwise return the value.toString.
* @param value Value to test for null, or a collection to test if it is empty.
* @param format Format string if the value has a value
* @param prefix Command line prefix per parameter.
* @param param Parameter to check for a value.
* @param suffix Optional suffix per parameter.
* @return "" if the value is null, or "" if the collection is empty, otherwise the value.toString.
*/
private def toValue(param: Any, format: String): String = if (CollectionUtils.isNullOrEmpty(param)) "" else
param match {
case Some(x) => format.format(x)
case x => format.format(x)
}
protected def formatValue(format: String)(prefix: String, param: Any, suffix: String): String =
if (CollectionUtils.isNullOrEmpty(param))
""
else
prefix + (param match {
case Some(x) => format.format(x)
case x => format.format(x)
}) + suffix
/**
* Gets the value of a field.