Better DOT support and updated recalibration pipeline

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3811 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-07-16 20:54:51 +00:00
parent 36ac73cf9a
commit b0fc42906e
4 changed files with 14 additions and 4 deletions

View File

@ -1,3 +1,4 @@
import java.io.File
import org.broadinstitute.sting.queue.QScript._
import org.apache.commons.io.FilenameUtils;
// Other imports can be added here
@ -35,16 +36,22 @@ setParams
// Run the pipeline
run
def bai(bam: File) = new File(bam + ".bai")
class Index(bamIn: File) extends GatkFunction {
@Input(doc="foo") var bam = bamIn
@Output(doc="foo") var bamIndex = bai(bamIn)
memoryLimit = Some(1)
override def dotString = "Index: %s".format(bamIn.getName)
def commandLine = "samtools index %s".format(bam)
}
class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction {
@Input(doc="foo") var bam = bamIn
@Input(doc="foo") var bamIndex = bai(bamIn)
@Output(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4)
override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, args)
def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData)
}
@ -53,6 +60,7 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args:
@Input(doc="foo") var recalData = recalDataIn
@Gather(classOf[BamGatherFunction])
@Output(doc="foo") var bamOut = bamOutArg
override def dotString = "TableRecalibrate: %s => %s [args %s]".format(bamInArg.getName, bamOutArg.getName, args)
memoryLimit = Some(2)
def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath())
}
@ -60,5 +68,6 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args:
class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction {
@Input(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4)
override def dotString = "AnalyzeCovariates: %s".format(recalDataIn.getName)
def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir)
}

View File

@ -157,10 +157,7 @@ class QGraph extends Logging {
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
// todo -- visualizeString() command, or something that shows inputs / outputs
val ve = new org.jgrapht.ext.EdgeNameProvider[QFunction] {
def getEdgeName( function: QFunction ) = function match {
case f: DispatchFunction => f.jobName + " => " + f.commandLine
case _ => ""
}
def getEdgeName( function: QFunction ) = function.dotString
}
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph)

View File

@ -57,6 +57,8 @@ trait DispatchFunction extends InputOutputFunction {
super.freeze
}
override def dotString = jobName + " => " + commandLine
/**
* Override the canon function to change any relative path to an absolute path.
*/

View File

@ -22,4 +22,6 @@ trait QFunction {
* Set of outputs for this function.
*/
def outputs: Set[Any]
def dotString = ""
}