Better DOT support and updated recalibration pipeline

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3811 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-07-16 20:54:51 +00:00
parent 36ac73cf9a
commit b0fc42906e
4 changed files with 14 additions and 4 deletions

View File

@ -1,3 +1,4 @@
import java.io.File
import org.broadinstitute.sting.queue.QScript._ import org.broadinstitute.sting.queue.QScript._
import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.FilenameUtils;
// Other imports can be added here // Other imports can be added here
@ -35,16 +36,22 @@ setParams
// Run the pipeline // Run the pipeline
run run
def bai(bam: File) = new File(bam + ".bai")
class Index(bamIn: File) extends GatkFunction { class Index(bamIn: File) extends GatkFunction {
@Input(doc="foo") var bam = bamIn @Input(doc="foo") var bam = bamIn
@Output(doc="foo") var bamIndex = bai(bamIn)
memoryLimit = Some(1) memoryLimit = Some(1)
override def dotString = "Index: %s".format(bamIn.getName)
def commandLine = "samtools index %s".format(bam) def commandLine = "samtools index %s".format(bam)
} }
class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction { class CountCovariates(bamIn: File, recalDataIn: File, args: String = "") extends GatkFunction {
@Input(doc="foo") var bam = bamIn @Input(doc="foo") var bam = bamIn
@Input(doc="foo") var bamIndex = bai(bamIn)
@Output(doc="foo") var recalData = recalDataIn @Output(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4) memoryLimit = Some(4)
override def dotString = "CountCovariates: %s [args %s]".format(bamIn.getName, args)
def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData) def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData)
} }
@ -53,6 +60,7 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args:
@Input(doc="foo") var recalData = recalDataIn @Input(doc="foo") var recalData = recalDataIn
@Gather(classOf[BamGatherFunction]) @Gather(classOf[BamGatherFunction])
@Output(doc="foo") var bamOut = bamOutArg @Output(doc="foo") var bamOut = bamOutArg
override def dotString = "TableRecalibrate: %s => %s [args %s]".format(bamInArg.getName, bamOutArg.getName, args)
memoryLimit = Some(2) memoryLimit = Some(2)
def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath()) def commandLine = gatkCommandLine("TableRecalibration") + args + " -l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut) // bamOut.getPath())
} }
@ -60,5 +68,6 @@ class TableRecalibrate(bamInArg: File, recalDataIn: File, bamOutArg: File, args:
class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction { class AnalyzeCovariates(recalDataIn: File, outputDir: File) extends GatkFunction {
@Input(doc="foo") var recalData = recalDataIn @Input(doc="foo") var recalData = recalDataIn
memoryLimit = Some(4) memoryLimit = Some(4)
override def dotString = "AnalyzeCovariates: %s".format(recalDataIn.getName)
def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir) def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5 -Rscript /broad/tools/apps/R-2.6.0/bin/Rscript".format(recalData, outputDir)
} }

View File

@ -157,10 +157,7 @@ class QGraph extends Logging {
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a // todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
// todo -- visualizeString() command, or something that shows inputs / outputs // todo -- visualizeString() command, or something that shows inputs / outputs
val ve = new org.jgrapht.ext.EdgeNameProvider[QFunction] { val ve = new org.jgrapht.ext.EdgeNameProvider[QFunction] {
def getEdgeName( function: QFunction ) = function match { def getEdgeName( function: QFunction ) = function.dotString
case f: DispatchFunction => f.jobName + " => " + f.commandLine
case _ => ""
}
} }
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph) //val iterator = new TopologicalOrderIterator(qGraph.jobGraph)

View File

@ -57,6 +57,8 @@ trait DispatchFunction extends InputOutputFunction {
super.freeze super.freeze
} }
override def dotString = jobName + " => " + commandLine
/** /**
* Override the canon function to change any relative path to an absolute path. * Override the canon function to change any relative path to an absolute path.
*/ */

View File

@ -22,4 +22,6 @@ trait QFunction {
* Set of outputs for this function. * Set of outputs for this function.
*/ */
def outputs: Set[Any] def outputs: Set[Any]
def dotString = ""
} }