At chartl's superb suggestion, command line files are now all Files instead of old method of sometimes "has a File". Should be easier when reassigning them.

No longer generating deprecated GATK arguments on the Queue extensions.
Emitting deprecation warnings to Queue compile to help debugging issues.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4195 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-09-02 21:30:48 +00:00
parent 0bb05fb472
commit fd5970fdd4
17 changed files with 125 additions and 127 deletions

View File

@ -110,8 +110,9 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
public static List<? extends ArgumentField> getArgumentFields(Class<?> classType) {
List<ArgumentField> argumentFields = new ArrayList<ArgumentField>();
for (ArgumentSource argumentSource: ParsingEngine.extractArgumentSources(classType))
for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
argumentFields.addAll(getArgumentFields(argumentDefinition));
if (!argumentSource.isDeprecated())
for (ArgumentDefinition argumentDefinition: argumentSource.createArgumentDefinitions())
argumentFields.addAll(getArgumentFields(argumentDefinition));
return argumentFields;
}
@ -131,7 +132,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
//return Collections.<ArgumentField>emptyList();
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputNamedFileDefinitionField(argumentDefinition), new IndexFilesField());
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new IndexFilesField());
} else if (argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Collections.singletonList(new InputArgumentField(argumentDefinition));
@ -327,15 +328,15 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
/**
* Named input_files.
*/
public static class InputNamedFileDefinitionField extends ArgumentDefinitionField {
public InputNamedFileDefinitionField(ArgumentDefinition argumentDefinition) {
public static class InputTaggedFileDefinitionField extends ArgumentDefinitionField {
public InputTaggedFileDefinitionField(ArgumentDefinition argumentDefinition) {
super(argumentDefinition);
}
@Override protected Class<?> getInnerType() { return null; } // NamedFile does not need to be imported.
@Override protected String getFieldType() { return "List[NamedFile]"; }
@Override protected Class<?> getInnerType() { return null; } // TaggedFile does not need to be imported.
@Override protected String getFieldType() { return "List[File]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() {
return " + repeat(\"\", %3$s, format=NamedFile.formatCommandLine(\"%1$s\"))";
return " + repeat(\"\", %3$s, format=TaggedFile.formatCommandLine(\"%1$s\"))";
}
}
@ -354,8 +355,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override protected String getRawFieldName() { return "index_files"; }
@Override protected String getFreezeFields() {
return String.format(
"index_files ++= input_file.filter(bam => bam != null && bam.file.getName.endsWith(\".bam\")).map(bam => new File(bam.file.getPath + \".bai\"))%n" +
"index_files ++= input_file.filter(sam => sam != null && sam.file.getName.endsWith(\".sam\")).map(sam => new File(sam.file.getPath + \".sai\"))%n");
"index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n" +
"index_files ++= input_file.filter(sam => sam != null && sam.getName.endsWith(\".sam\")).map(sam => new File(sam.getPath + \".sai\"))%n");
}
}

View File

@ -44,20 +44,20 @@ class Index(bamIn: File) extends SamtoolsIndexFunction {
class MergeBAMs(bamList: File) extends PrintReads with UNIVERSAL_GATK_ARGS {
this.memoryLimit = Some(3)
this.input_file :+= bamList.toNamedFile
this.input_file :+= bamList
this.o = new File(MERGED_DIR + "/" + bamList.getName + ".bam")
}
class Call(@Input(doc="foo") bamList: File, n: Integer, name: String) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
class Call(@Input(doc="foo") bamList: File, n: Int, name: String) extends UnifiedGenotyper with UNIVERSAL_GATK_ARGS {
@Output(doc="foo") var outVCF: File = new File("%s.%d.%s.vcf".format(bamList.getName, n, name))
this.memoryLimit = Some(4)
this.input_file :+= bamList.toNamedFile
this.input_file :+= bamList
this.jobQueue = "gsa"
this.stand_call_conf = Option(10.0)
this.o = outVCF
}
class SliceList(n: Integer) extends CommandLineFunction {
class SliceList(n: Int) extends CommandLineFunction {
@Output(doc="foo") var list: File = new File("bams.%d.list".format(n))
this.jobQueue = "gsa"
def commandLine = "head -n %d %s > %s".format(n, FULL_BAM_LIST, list)

View File

@ -55,7 +55,7 @@ class ExampleCountReads extends QScript {
singleCountReads.jarFile = gatkJar
singleCountReads.reference_sequence = referenceFile
// ':+' is the scala List append operator
singleCountReads.input_file :+= bamFile.toNamedFile
singleCountReads.input_file :+= bamFile
add(singleCountReads)
}
}

View File

@ -58,7 +58,7 @@ class ExampleUnifiedGenotyper extends QScript {
// If you are running this on a compute farm, make sure that the Sting/shell
// folder is in your path to use mergeText.sh and splitIntervals.sh.
genotyper.scatterCount = 3
genotyper.input_file :+= qscript.bamFile.toNamedFile
genotyper.input_file :+= qscript.bamFile
genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
evalUnfiltered.rodBind :+= RodBind("vcf", "VCF", genotyper.out)

View File

@ -80,7 +80,7 @@ class fullCallingPipeline extends QScript {
val cleanedBase: String = projectBase + ".cleaned"
val uncleanedBase: String = projectBase + ".uncleaned"
// there are commands that use all the bam files
var cleanBamFiles = List.empty[NamedFile]
var cleanBamFiles = List.empty[File]
for ( bam <- qscript.bamFiles ) {
@ -94,7 +94,7 @@ class fullCallingPipeline extends QScript {
// create the cleaning commands
val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs
targetCreator.input_file :+= bam.toNamedFile
targetCreator.input_file :+= bam
targetCreator.out = indel_targets
val realigner = new IndelRealigner with CommandLineGATKArgs
@ -112,7 +112,7 @@ class fullCallingPipeline extends QScript {
// put clean bams in clean genotypers
cleanBamFiles :+= realigner.out.toNamedFile
cleanBamFiles :+= realigner.out
add(targetCreator,realigner,samtoolsindex)
}
@ -122,7 +122,7 @@ class fullCallingPipeline extends QScript {
endToEnd(cleanedBase,cleanBamFiles)
}
def endToEnd(base: String, bamFiles: List[NamedFile]) = {
def endToEnd(base: String, bamFiles: List[File]) = {
// step through the un-indel-cleaned graph:
// 1a. call snps and indels
@ -158,7 +158,7 @@ class fullCallingPipeline extends QScript {
var priority = ""
for ( bam <- bamFiles ) {
val indel = new IndelGenotyperV2 with CommandLineGATKArgs
indel.input_file :+= bam.toNamedFile
indel.input_file :+= bam
indel.out = swapExt(bam,".bam",".indels.vcf")
indel.downsample_to_coverage = Some(500)
indelCallFiles :+= RodBind("v"+loopNo.toString, "VCF", indel.out)
@ -186,28 +186,28 @@ class fullCallingPipeline extends QScript {
annotated.rodBind :+= RodBind("variant", "VCF", snps.out)
annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
annotated.rodBind :+= RodBind("dbsnp", "AnnotatorInputTable", qscript.dbsnpTable)
annotated.vcfOutput = swapExt(snps.out,".vcf",".annotated.vcf").getAbsolutePath
annotated.out = swapExt(snps.out,".vcf",".annotated.vcf")
annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
annotated.rodToIntervalTrackName = "variant"
// 2.a filter on cluster and near indels
val masker = new VariantFiltration with CommandLineGATKArgs
masker.rodBind :+= RodBind("variant", "VCF", new File(annotated.vcfOutput))
masker.rodBind :+= RodBind("mask", "VCF", new File(mergeIndels.out.getAbsolutePath))
masker.rodBind :+= RodBind("variant", "VCF", annotated.out)
masker.rodBind :+= RodBind("mask", "VCF", mergeIndels.out)
masker.maskName = "NearIndel"
masker.clusterWindowSize = Some(qscript.snpClusterWindow)
masker.clusterSize = Some(qscript.snpsInCluster)
masker.out = swapExt(new File(annotated.vcfOutput),".vcf",".indel.masked.vcf")
masker.out = swapExt(annotated.out,".vcf",".indel.masked.vcf")
// 2.b hand filter with standard filter
val handFilter = new VariantFiltration with CommandLineGATKArgs
handFilter.rodBind :+= RodBind("variant", "VCF", new File(annotated.vcfOutput))
handFilter.rodBind :+= RodBind("variant", "VCF", annotated.out)
handFilter.rodBind :+= RodBind("mask", "VCF", mergeIndels.out)
handFilter.filterName ++= List("StrandBias","AlleleBalance","QualByDepth","HomopolymerRun")
handFilter.filterExpression ++= List("\"SB>=0.10\"","\"AB>=0.75\"","QD<5","\"HRun>=4\"")
handFilter.out = swapExt(new File(annotated.vcfOutput),".vcf",".handfiltered.vcf")
handFilter.out = swapExt(annotated.out,".vcf",".handfiltered.vcf")
// 3.i generate gaussian clusters on the masked vcf

View File

@ -131,16 +131,12 @@ class CleanBamFile extends QScript {
var fixedBam: File = null
if (realigner.scatterCount > 1) {
realigner.output = baseFile(".cleaned.bam")
realigner.out = baseFile(".cleaned.bam")
// While gathering run fix mates.
realigner.setupScatterFunction = {
case (scatter: IntervalScatterFunction, _) =>
scatter.splitIntervalsScript = indelRealignerScatterScript
}
realigner.gatherClass = {
case source if (source.field.getName=="output") =>
classOf[BamGatherFunction]
}
realigner.setupGatherFunction = {
case (gather: BamGatherFunction, _) =>
gather.memoryLimit = Some(4)
@ -151,9 +147,9 @@ class CleanBamFile extends QScript {
gather.mergeTextScript = mergeTextScript
}
fixedBam = realigner.output
fixedBam = realigner.out
} else {
realigner.output = baseFile(".unfixed.cleaned.bam")
realigner.out = baseFile(".unfixed.cleaned.bam")
// Explicitly run fix mates if the function won't be scattered.
var fixMates = new PicardBamJarFunction {
@ -165,7 +161,7 @@ class CleanBamFile extends QScript {
}
fixMates.memoryLimit = Some(4)
fixMates.jarFile = fixMatesJar
fixMates.unfixed = realigner.output
fixMates.unfixed = realigner.out
fixMates.fixed = baseFile(".cleaned.bam")
fixedBam = fixMates.fixed

View File

@ -64,7 +64,6 @@ class CountCovariates(bamIn: File, recalDataIn: File) extends org.broadinstitute
this.recal_file = recalDataIn
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod")
this.logging_level = "INFO"
this.max_reads_at_locus = Some(20000)
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.memoryLimit = Some(3)
@ -75,7 +74,7 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File) exten
this.jarFile = gatkJarFile
this.input_file :+= bamInArg
this.recal_file = recalDataIn
this.output_bam = bamOutArg
this.out = bamOutArg
this.logging_level = "INFO"
this.memoryLimit = Some(2)
this.skipUQUpdate = skipUQUpdateArg

View File

@ -26,24 +26,23 @@ for (g: Int <- gList) {
gvc.logging_level = "INFO"
gvc.intervalsString :+= "20"
gvc.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
gvc.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
gvc.maxGaussians = Some(g)
gvc.shrinkage = Some(s)
gvc.shrinkageFormat = "%.6f"
gvc.dirichlet = Some(d)
gvc.dirichletFormat = "%.6f"
gvc.clusterFile = "g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b)
gvc.jobOutputFile = new File(gvc.clusterFile.stripSuffix(".cluster") + ".gvc.out")
gvc.clusterFile = new File("g%d_s%.6f_d%.6f_b%.2f.cluster".format(g,s,d,b))
gvc.jobOutputFile = swapExt(gvc.clusterFile, ".cluster", ".gvc.out")
vr.jarFile = gatkJarFile
vr.rodBind :+= RodBind("input20", "VCF", new File("/broad/shptmp/rpoplin/CEUTSI.chr20.filtered.vcf"))
vr.logging_level = "INFO"
vr.intervalsString :+= "20"
vr.target_titv = Some(2.1)
vr.target_titv = 2.1
vr.ignore_filter :+= "HARD_TO_VALIDATE"
vr.path_to_resources = "/humgen/gsa-scr1/rpoplin/sting_dev_vb/R/"
vr.clusterFile = gvc.clusterFile
vr.jobOutputFile = new File(vr.clusterFile.stripSuffix(".cluster") + ".vr.out")
vr.jobOutputFile = swapExt(vr.clusterFile, ".cluster", ".vr.out")
vr.backOff = Some(b)
vr.backOffFormat = "%.2f"

View File

@ -49,6 +49,7 @@ object QScriptManager extends Logging {
val settings = new Settings((error: String) => logger.error(error))
val outdir = IOUtils.tempDir("Q-classes")
settings.deprecation.value = true
settings.outdir.value = outdir.getPath
// Set the classpath to the current class path.

View File

@ -1,35 +0,0 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
/**
* Used to provide -I input_file arguments to the GATK.
*/
class NamedFile(var file: File, var name: String = null) extends FileProvider {
require(file != null, "NamedFile file cannot be null")
}
/**
* Used to provide -I input_file arguments to the GATK.
*/
object NamedFile {
/**
* Formats the rod binding on the command line.
* Used for optional and repeat.
* @param cmdLineParam command line parameter, ex: -I
* @param prefix unused
* @param value NamedFile to add.
* @param suffix unused
* @return The command line addition.
*/
def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = {
value match {
case namedFile: NamedFile =>
if (namedFile.name != null)
" %s:%s %s".format(cmdLineParam, namedFile.name, namedFile.file)
else
" %s %s".format(cmdLineParam, namedFile.file)
}
}
}

View File

@ -1,8 +0,0 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
class NamedFileWrapper(private val file: File) {
def toNamedFile = new NamedFile(file)
def toNamedFile(name: String) = new NamedFile(file, name)
}

View File

@ -1,21 +1,27 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileProvider
import org.broadinstitute.sting.queue.function.FileExtension
import java.lang.String
/**
* Used to provide -B rodBind arguments to the GATK.
*/
case class RodBind(var trackName: String, var trackType: String, var file: File) extends FileProvider {
class RodBind(var trackName: String, var trackType: String, path: String) extends File(path) with FileExtension {
def this(trackName: String, trackType: String, file: File) =
this(trackName, trackType, file.getPath)
require(trackName != null, "RodBind trackName cannot be null")
require(trackType != null, "RodBind trackType cannot be null")
require(file != null, "RodBind file cannot be null")
def withPath(newPath: String) = new RodBind(trackName, trackType, newPath)
}
/**
* Used to provide -B rodBind arguments to the GATK.
*/
object RodBind {
def apply(trackName: String, trackType: String, path: String) = new RodBind(trackName, trackType, path)
def apply(trackName: String, trackType: String, file: File) = new RodBind(trackName, trackType, file)
/**
* Formats the rod binding on the command line.
* Used for optional and repeat.
@ -28,7 +34,7 @@ object RodBind {
def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = {
value match {
case rodBind: RodBind =>
" %s:%s,%s %s".format(cmdLineParam, rodBind.trackName, rodBind.trackType, rodBind.file)
" %s:%s,%s %s".format(cmdLineParam, rodBind.trackName, rodBind.trackType, rodBind.getPath)
}
}
}

View File

@ -0,0 +1,39 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileExtension
/**
* Used to provide tagged -I input_file arguments to the GATK.
*/
class TaggedFile(path: String, val tag: String) extends File(path) with FileExtension {
def this(file: File, tag: String) =
this(file.getPath, tag)
def withPath(path: String) = new TaggedFile(path, tag)
}
/**
* Used to provide -I input_file arguments to the GATK.
*/
object TaggedFile {
def apply(path: String, tag: String) = new TaggedFile(path, tag)
def apply(file: File, tag: String) = new TaggedFile(file, tag)
/**
* Formats the rod binding on the command line.
* Used for optional and repeat.
* @param cmdLineParam command line parameter, ex: -I
* @param prefix unused
* @param value TaggedFile to add.
* @param suffix unused
* @return The command line addition.
*/
def formatCommandLine(cmdLineParam: String)(prefix: String, value: Any, suffix: String) = {
value match {
case taggedFile: TaggedFile if (taggedFile.tag != null) =>
" %s:%s %s".format(cmdLineParam, taggedFile.tag, taggedFile.getPath)
case file: File =>
" %s %s".format(cmdLineParam, file.getPath)
}
}
}

View File

@ -1,13 +0,0 @@
package org.broadinstitute.sting.queue.extensions
import java.io.File
import org.broadinstitute.sting.queue.extensions.gatk.NamedFile
import org.broadinstitute.sting.queue.extensions.gatk.NamedFileWrapper
package object gatk {
implicit def fileToNamedFileWrapper(file: File) = new NamedFileWrapper(file)
// TODO: Get the syntax right so that the implicits kick in for a generic type, ex: Travesable[File], Traversable[_ <: File], etc.
// but need to return the same outter type, so T <: Traversable[File] : T[NamedFile], T <: Traversable[_ <: File]: T[NamedFile], etc.
implicit def filesToNamedFilesWrapper(files: List[File]) = files.map(file => if (file == null) null else new NamedFile(file))
implicit def filesToNamedFilesWrapper(files: Set[File]) = files.map(file => if (file == null) null else new NamedFile(file))
}

View File

@ -94,7 +94,7 @@ trait CommandLineFunction extends QFunction with Logging {
def outputs = getFieldFiles(outputFields)
/**
* Gets the files from the fields. The fields must be a File, a FileProvider, or a List or Set of either.
* Gets the files from the fields. The fields must be a File, a FileExtension, or a List or Set of either.
* @param fields Fields to get files.
* @return Set[File] for the fields.
*/
@ -121,7 +121,7 @@ trait CommandLineFunction extends QFunction with Logging {
}
/**
* Gets the files from the field. The field must be a File, a FileProvider, or a List or Set of either.
* Gets the files from the field. The field must be a File, a FileExtension, or a List or Set of either.
* @param fields Field to get files.
* @return Set[File] for the field.
*/
@ -136,7 +136,7 @@ trait CommandLineFunction extends QFunction with Logging {
}
/**
* Gets the file from the field. The field must be a File or a FileProvider and not a List or Set.
* Gets the file from the field. The field must be a File or a FileExtension and not a List or Set.
* @param field Field to get the file.
* @return File for the field.
*/
@ -144,17 +144,16 @@ trait CommandLineFunction extends QFunction with Logging {
fieldValueToFile(field, getFieldValue(field))
/**
* Converts the field value to a file. The field must be a File or a FileProvider.
* Converts the field value to a file. The field must be a File or a FileExtension.
* @param field Field to get the file.
* @param value Value of the File or FileProvider or null.
* @param value Value of the File or FileExtension or null.
* @return Null if value is null, otherwise the File.
* @throws QException if the value is not a File or FileProvider.
* @throws QException if the value is not a File or FileExtension.
*/
private def fieldValueToFile(field: ArgumentSource, value: Any): File = value match {
case file: File => file
case fileProvider: FileProvider => fileProvider.file
case null => null
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or implement FileProvider: %s: %s".format(field.field, unknown))
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
}
/**
@ -164,15 +163,19 @@ trait CommandLineFunction extends QFunction with Logging {
*/
def resetFieldFile(field: ArgumentSource, tempDir: File): File = {
getFieldValue(field) match {
case fileExtension: FileExtension => {
val newFile = IOUtils.resetParent(tempDir, fileExtension)
val newFileExtension = fileExtension.withPath(newFile.getPath)
setFieldValue(field, newFileExtension)
newFileExtension
}
case file: File => {
if (file.getClass == classOf[File])
throw new QException("Extensions of file must also extend with FileExtension so that the path can be modified.");
val newFile = IOUtils.resetParent(tempDir, file)
setFieldValue(field, newFile)
newFile
}
case fileProvider: FileProvider => {
fileProvider.file = IOUtils.resetParent(tempDir, fileProvider.file)
fileProvider.file
}
case null => null
case unknown =>
throw new QException("Unable to set file from %s: %s".format(field, unknown))
@ -242,8 +245,14 @@ trait CommandLineFunction extends QFunction with Logging {
*/
protected def canon(value: Any) = {
value match {
case file: File => absolute(file)
case fileProvider: FileProvider => fileProvider.file = absolute(fileProvider.file); fileProvider
case fileExtension: FileExtension =>
val newFile = absolute(fileExtension);
val newFileExtension = fileExtension.withPath(newFile.getPath)
newFileExtension
case file: File =>
if (file.getClass != classOf[File])
throw new QException("Extensions of file must also extend with FileExtension so that the path can be modified.");
absolute(file)
case x => x
}
}

View File

@ -0,0 +1,15 @@
package org.broadinstitute.sting.queue.function
import java.io.File
/**
* An trait for @Input or @Output CommandLineFunction fields that are extensions of files.
*/
trait FileExtension extends File {
/**
* Returns a clone of the FileExtension with the new path.
* @param newPath new path for the clone of this FileExtension
* @return a clone of the FileExtension with the new path.
*/
def withPath(newPath: String): File
}

View File

@ -1,11 +0,0 @@
package org.broadinstitute.sting.queue.function
import java.io.File
/**
* An trait for @Input or @Output CommandLineFunction fields that are not files, but have a File that can be get/set.
*/
trait FileProvider {
/** Gets/Sets the file. */
var file: File
}