From 530a320f2876ff2c3a4e21f3f4ce922963c43fed Mon Sep 17 00:00:00 2001 From: depristo Date: Tue, 13 Jul 2010 22:46:08 +0000 Subject: [PATCH] Intermediate commit of scatter/gather recalibation pipeline git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3785 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/recalibrate.scala | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 scala/qscript/recalibrate.scala diff --git a/scala/qscript/recalibrate.scala b/scala/qscript/recalibrate.scala new file mode 100755 index 000000000..c150bc8d7 --- /dev/null +++ b/scala/qscript/recalibrate.scala @@ -0,0 +1,60 @@ +import org.broadinstitute.sting.queue.QScript._ +import org.apache.commons.io.FilenameUtils; +// Other imports can be added here + +val unusedArgs = setArgs(args) + +def runPipeline() = { + for (bamIn <- inputs(".bam")) { + val root = bamIn.getPath() + val bamRoot = FilenameUtils.removeExtension(root); + val recalData = bamRoot + ".recal_data.csv" + val recalBam = bamRoot + ".recal.bam" + val recalRecalData = bamRoot + ".recal.recal_data.csv" + //add(new CountCovariates(root, recalData, "-OQ")) + val tableRecal = new TableRecalibrate(root, recalData, recalBam) + tableRecal.intervals = new File("/humgen/gsa-hpprojects/GATK/data/chromosomes.hg18.interval_list") + tableRecal.scatterCount = 25 + add(tableRecal) + add(new Index(recalBam)) + add(new CountCovariates(recalBam, recalRecalData)) + add(new AnalyzeCovariates(recalData, recalData + ".analyzeCovariates")) + add(new AnalyzeCovariates(recalRecalData, recalRecalData + ".analyzeCovariates")) + } +} + +runPipeline() + +// Populate parameters passed in via -P +setParams + +// Run the pipeline +run + +class Index(bamIn: String) extends GatkFunction { + @Input(doc="foo") var bam = bamIn + memoryLimit = Some(1) + def commandLine = "samtools index %s".format(bam) +} + +class CountCovariates(bamIn: String, recalDataIn: String, args: String = "") extends GatkFunction { + @Input(doc="foo") var bam = bamIn + @Output(doc="foo") var recalData = recalDataIn + memoryLimit = Some(4) + def commandLine = gatkCommandLine("CountCovariates") + args + " -l INFO -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod -I %s --max_reads_at_locus 20000 -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -recalFile %s".format(bam, recalData) +} + +class TableRecalibrate(bamInArg: String, recalDataIn: String, bamOutArg: String) extends GatkFunction { + @Input(doc="foo") var bamIn = bamInArg + @Input(doc="foo") var recalData = recalDataIn + @Gather(classOf[BamGatherFunction]) + @Output(doc="foo") var bamOut = new File(bamOutArg) + memoryLimit = Some(2) + def commandLine = gatkCommandLine("TableRecalibration") + "-l INFO -I %s -recalFile %s -outputBam %s".format(bamIn, recalData, bamOut.getPath()) +} + +class AnalyzeCovariates(recalDataIn: String, outputDir: String) extends GatkFunction { + @Input(doc="foo") var recalData = recalDataIn + memoryLimit = Some(4) + def commandLine = "java -Xmx4g -jar /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/dist/AnalyzeCovariates.jar -recalFile %s -outputDir %s -resources /home/radon01/depristo/dev/GenomeAnalysisTK/trunk/R/ -ignoreQ 5".format(recalData, outputDir) +} \ No newline at end of file